Merge remote-tracking branch 'remotes/origin/master' into guoguo/linuxBuildFix

This commit is contained in:
Guoguo Chen 2015-10-07 18:19:11 -04:00
Родитель 1c93b56ccc baee724391
Коммит b415924234
189 изменённых файлов: 20179 добавлений и 51663 удалений

Просмотреть файл

@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2013
VisualStudioVersion = 12.0.31101.0
VisualStudioVersion = 12.0.21005.1
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKMathDll", "Math\Math\Math.vcxproj", "{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}"
ProjectSection(ProjectDependencies) = postProject
@ -204,6 +204,7 @@ EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKComputationNetworkLib", "MachineLearning\CNTKComputationNetworkLib\CNTKComputationNetworkLib.vcxproj", "{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKSGDLib", "MachineLearning\CNTKSGDLib\CNTKSGDLib.vcxproj", "{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}"
@ -350,127 +351,85 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FullUtterance", "FullUttera
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|Win32.ActiveCfg = Debug|x64
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|x64.ActiveCfg = Debug|x64
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|x64.Build.0 = Debug|x64
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|Win32.ActiveCfg = Release|x64
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|x64.ActiveCfg = Release|x64
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|x64.Build.0 = Release|x64
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|Win32.ActiveCfg = Debug|x64
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|x64.ActiveCfg = Debug|x64
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|x64.Build.0 = Debug|x64
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|Win32.ActiveCfg = Release|x64
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|x64.ActiveCfg = Release|x64
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|x64.Build.0 = Release|x64
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|Win32.ActiveCfg = Debug|x64
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|x64.ActiveCfg = Debug|x64
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|x64.Build.0 = Debug|x64
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Release|Win32.ActiveCfg = Release|x64
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Release|x64.ActiveCfg = Release|x64
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Release|x64.Build.0 = Release|x64
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|Win32.ActiveCfg = Debug|x64
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.ActiveCfg = Debug|x64
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.Build.0 = Debug|x64
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|Win32.ActiveCfg = Release|x64
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|x64.ActiveCfg = Release|x64
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|x64.Build.0 = Release|x64
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Debug|Win32.ActiveCfg = Debug|x64
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Debug|x64.ActiveCfg = Debug|x64
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Debug|x64.Build.0 = Debug|x64
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Release|Win32.ActiveCfg = Release|x64
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Release|x64.ActiveCfg = Release|x64
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Debug|Win32.ActiveCfg = Debug|x64
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Debug|x64.ActiveCfg = Debug|x64
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Debug|x64.Build.0 = Debug|x64
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Release|Win32.ActiveCfg = Release|x64
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Release|x64.ActiveCfg = Release|x64
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Release|x64.Build.0 = Release|x64
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Debug|Win32.ActiveCfg = Debug|x64
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Debug|x64.ActiveCfg = Debug|x64
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Debug|x64.Build.0 = Debug|x64
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Release|Win32.ActiveCfg = Release|x64
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Release|x64.ActiveCfg = Release|x64
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Release|x64.Build.0 = Release|x64
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Debug|Win32.ActiveCfg = Debug|x64
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Debug|x64.ActiveCfg = Debug|x64
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Debug|x64.Build.0 = Debug|x64
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Release|Win32.ActiveCfg = Release|x64
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Release|x64.ActiveCfg = Release|x64
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Release|x64.Build.0 = Release|x64
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Debug|Win32.ActiveCfg = Debug|x64
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Debug|x64.ActiveCfg = Debug|x64
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Debug|x64.Build.0 = Debug|x64
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Release|Win32.ActiveCfg = Release|x64
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Release|x64.ActiveCfg = Release|x64
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Release|x64.Build.0 = Release|x64
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|Win32.ActiveCfg = Debug|x64
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|x64.ActiveCfg = Debug|x64
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|x64.Build.0 = Debug|x64
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Release|Win32.ActiveCfg = Release|x64
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Release|x64.ActiveCfg = Release|x64
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Release|x64.Build.0 = Release|x64
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Debug|Win32.ActiveCfg = Debug|x64
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Debug|x64.ActiveCfg = Debug|x64
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Debug|x64.Build.0 = Debug|x64
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Release|Win32.ActiveCfg = Release|x64
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Release|x64.ActiveCfg = Release|x64
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Release|x64.Build.0 = Release|x64
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Debug|Win32.ActiveCfg = Debug|x64
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Debug|x64.ActiveCfg = Debug|x64
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Debug|x64.Build.0 = Debug|x64
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Release|Win32.ActiveCfg = Release|x64
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Release|x64.ActiveCfg = Release|x64
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Release|x64.Build.0 = Release|x64
{014DA766-B37B-4581-BC26-963EA5507931}.Debug|Win32.ActiveCfg = Debug|x64
{014DA766-B37B-4581-BC26-963EA5507931}.Debug|x64.ActiveCfg = Debug|x64
{014DA766-B37B-4581-BC26-963EA5507931}.Debug|x64.Build.0 = Debug|x64
{014DA766-B37B-4581-BC26-963EA5507931}.Release|Win32.ActiveCfg = Release|x64
{014DA766-B37B-4581-BC26-963EA5507931}.Release|x64.ActiveCfg = Release|x64
{014DA766-B37B-4581-BC26-963EA5507931}.Release|x64.Build.0 = Release|x64
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Debug|Win32.ActiveCfg = Debug|x64
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Debug|x64.ActiveCfg = Debug|x64
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Debug|x64.Build.0 = Debug|x64
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Release|Win32.ActiveCfg = Release|x64
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Release|x64.ActiveCfg = Release|x64
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Release|x64.Build.0 = Release|x64
{DBB3C106-B0B4-4059-8477-C89528CEC1B0}.Debug|Win32.ActiveCfg = Debug|x64
{DBB3C106-B0B4-4059-8477-C89528CEC1B0}.Debug|x64.ActiveCfg = Debug|x64
{DBB3C106-B0B4-4059-8477-C89528CEC1B0}.Release|Win32.ActiveCfg = Release|x64
{DBB3C106-B0B4-4059-8477-C89528CEC1B0}.Release|x64.ActiveCfg = Release|x64
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Debug|Win32.ActiveCfg = Debug|x64
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Debug|x64.ActiveCfg = Debug|x64
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Debug|x64.Build.0 = Debug|x64
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Release|Win32.ActiveCfg = Release|x64
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Release|x64.ActiveCfg = Release|x64
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Release|x64.Build.0 = Release|x64
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Debug|Win32.ActiveCfg = Debug|x64
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Debug|x64.ActiveCfg = Debug|x64
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Debug|x64.Build.0 = Debug|x64
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Release|Win32.ActiveCfg = Release|x64
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Release|x64.ActiveCfg = Release|x64
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Release|x64.Build.0 = Release|x64
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Debug|Win32.ActiveCfg = Debug|x64
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Debug|x64.ActiveCfg = Debug|x64
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Debug|x64.Build.0 = Debug|x64
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Release|Win32.ActiveCfg = Release|x64
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Release|x64.ActiveCfg = Release|x64
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Release|x64.Build.0 = Release|x64
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Debug|Win32.ActiveCfg = Debug|x64
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Debug|x64.ActiveCfg = Debug|x64
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Debug|x64.Build.0 = Debug|x64
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Release|Win32.ActiveCfg = Release|x64
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Release|x64.ActiveCfg = Release|x64
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Release|x64.Build.0 = Release|x64
{EAD17188-072C-4726-B840-A769C36DAD1B}.Debug|Win32.ActiveCfg = Debug|x64
{EAD17188-072C-4726-B840-A769C36DAD1B}.Debug|x64.ActiveCfg = Debug|x64
{EAD17188-072C-4726-B840-A769C36DAD1B}.Debug|x64.Build.0 = Debug|x64
{EAD17188-072C-4726-B840-A769C36DAD1B}.Release|Win32.ActiveCfg = Release|x64
{EAD17188-072C-4726-B840-A769C36DAD1B}.Release|x64.ActiveCfg = Release|x64
{EAD17188-072C-4726-B840-A769C36DAD1B}.Release|x64.Build.0 = Release|x64
EndGlobalSection
@ -478,46 +437,46 @@ Global
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{B3DD765E-694E-4494-BAD7-37BBF2942517} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{6CEE834A-8104-46A8-8902-64C81BD7928F} = {D45DF403-6781-444E-B654-A96868C5BE68}
{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976} = {D45DF403-6781-444E-B654-A96868C5BE68}
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC} = {D45DF403-6781-444E-B654-A96868C5BE68}
{DBB3C106-B0B4-4059-8477-C89528CEC1B0} = {D45DF403-6781-444E-B654-A96868C5BE68}
{C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} = {D45DF403-6781-444E-B654-A96868C5BE68}
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B} = {D45DF403-6781-444E-B654-A96868C5BE68}
{5E666C53-2D82-49C9-9127-3FDDC321C741} = {D45DF403-6781-444E-B654-A96868C5BE68}
{E6646FFE-3588-4276-8A15-8D65C22711C1} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{1D5787D4-52E4-45DB-951B-82F220EE0C6A} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC} = {D45DF403-6781-444E-B654-A96868C5BE68}
{B3DD765E-694E-4494-BAD7-37BBF2942517} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{9A2F2441-5972-4EA8-9215-4119FCE0FB68} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{014DA766-B37B-4581-BC26-963EA5507931} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{D667AF32-028A-4A5D-BE19-F46776F0F6B2} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{3ED0465D-23E7-4855-9694-F788717B6533} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{CE429AA2-3778-4619-8FD1-49BA3B81197B} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{065AF55D-AF02-448B-BFCD-52619FDA4BD0} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{3ED0465D-23E7-4855-9694-F788717B6533} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{3E9C89B1-C045-4F42-92B2-F9FFFFC2DBD4} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{C70E1572-20FF-496C-A0A9-10AA6755A07C} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{98D2C32B-0C1F-4E19-A626-65F7BA4600CF} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
{EA67F51F-1FE8-462D-9F3E-01161685AD59} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
{DE1A06BA-EC5C-4E0D-BCA8-3EA555310C58} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
{63024704-A2D7-497E-AD4B-5C10C6AA1374} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
{F9BEB27E-8AF5-464E-8D45-0000D5AFA2D3} = {EA67F51F-1FE8-462D-9F3E-01161685AD59}
{889C1CCF-92B3-450B-B00D-FC9A9D5BE464} = {EA67F51F-1FE8-462D-9F3E-01161685AD59}
{DBB3C106-B0B4-4059-8477-C89528CEC1B0} = {D45DF403-6781-444E-B654-A96868C5BE68}
{CE429AA2-3778-4619-8FD1-49BA3B81197B} = {33EBFE78-A1A8-4961-8938-92A271941F94}
{C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} = {D45DF403-6781-444E-B654-A96868C5BE68}
{4BBF2950-3DBD-469A-AD57-6CACBEBAF541} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
{5F733BBA-FE83-4668-8F83-8B0E78A36619} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
{19EE975B-232D-49F0-94C7-6F1C6424FB53} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B} = {D45DF403-6781-444E-B654-A96868C5BE68}
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{5E666C53-2D82-49C9-9127-3FDDC321C741} = {D45DF403-6781-444E-B654-A96868C5BE68}
{88F85A64-105D-4CDA-8199-B7A312FC8A27} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
{8241108A-7824-4FF2-BECA-7521A9D89DCF} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
{6D1353D6-F196-466F-B886-F16D48759B20} = {5E666C53-2D82-49C9-9127-3FDDC321C741}
{B6725C9F-A6D2-4269-9B74-7888A90F7884} = {5E666C53-2D82-49C9-9127-3FDDC321C741}
{B27DD434-EECD-4EE0-A03B-1150EB87258E} = {B6725C9F-A6D2-4269-9B74-7888A90F7884}
{A4884465-CFBB-4A64-A9DE-690E1A63EF7E} = {B6725C9F-A6D2-4269-9B74-7888A90F7884}
{3E9C89B1-C045-4F42-92B2-F9FFFFC2DBD4} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{C70E1572-20FF-496C-A0A9-10AA6755A07C} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{88F85A64-105D-4CDA-8199-B7A312FC8A27} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
{8241108A-7824-4FF2-BECA-7521A9D89DCF} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -102,6 +102,14 @@ void Eval<ElemType>::GetNodeDimensions(std::map<std::wstring, size_t>& dimension
m_eval->GetNodeDimensions(dimensions, nodeGroup);
}
// StartEvaluateMinibatchLoop - Prepare network for Evaluate() calls.
// ouputNodeName - name of node that will be evaluated
template<class ElemType>
void Eval<ElemType>::StartEvaluateMinibatchLoop(const std::wstring & outputNodeName)
{
m_eval->StartEvaluateMinibatchLoop(outputNodeName);
}
// Evaluate - Evalute using the model with the given inputs and outputs
// inputs - map from node name to input vector
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
@ -122,4 +130,4 @@ void Eval<ElemType>::ResetState()
template class Eval<double>;
template class Eval<float>;
}}}
}}}

Просмотреть файл

@ -47,7 +47,8 @@ public:
virtual void LoadModel(const std::wstring& modelFileName)=0;
virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup)=0;
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs)=0;
virtual void StartEvaluateMinibatchLoop(const std::wstring & outputNodeName) = 0;
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs) = 0;
virtual void ResetState() = 0;
};
@ -92,6 +93,10 @@ public:
// nodeGroup - type of node we are requesting (input/output/specified)
virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup);
// StartEvaluateMinibatchLoop - Prepare network for Evaluate() calls.
// ouputNodeName - name of node that will be evaluated
virtual void StartEvaluateMinibatchLoop(const std::wstring & outputNodeName);
// Evaluate - Evalute using the model with the given inputs and outputs
// inputs - map from node name to input vector
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation

Просмотреть файл

@ -1320,15 +1320,16 @@ class argvector: public std::vector<T>
}
// convert wstring toks2[0] to T val and check type
static void parse(const std::wstring& in, size_t& val)
template<typename INT>
static void parseint(const std::wstring& in, INT& val)
{
float fval = (float) msra::strfun::todouble(in);
val = (size_t) fval;
if (val != fval)
{
double dval = msra::strfun::todouble(in);
val = (INT)dval;
if (val != dval)
RuntimeError("argvector: invalid arg value");
}
}
static void parse(const std::wstring& in, size_t& val) { parseint(in, val); }
static void parse(const std::wstring& in, int& val) { parseint(in, val); }
static void parse(const std::wstring& in, std::wstring& val)
{
val = in;

Просмотреть файл

@ -984,8 +984,9 @@ public:
void getedgeacscores (std::vector<float> & edgeacscores);
void getedgealignments (std::vector<unsigned short> & edgealignments);
//to work with CNTK's GPU memory
void setmode(bool cpumode/*, size_t DeviceId*/);
void release(bool cpumode);
void setdevice(size_t DeviceId);
size_t getdevice();
void release(bool cpumode);
void setloglls(const Microsoft::MSR::CNTK::Matrix<float>& loglls);
void setloglls(const Microsoft::MSR::CNTK::Matrix<double>& loglls);
void getgamma(Microsoft::MSR::CNTK::Matrix<float>& loglls);

Просмотреть файл

@ -13,6 +13,7 @@
#include <string> // for the error message in checkoverflow() only
#include <stdexcept>
#include <stdint.h>
#include <cstdio>
#undef INITIAL_STRANGE // [v-hansu] intialize structs to strange values
#define PARALLEL_SIL // [v-hansu] process sil on CUDA, used in other files, please search this
@ -25,7 +26,12 @@ static void checkoverflow (size_t fieldval, size_t targetval, const char * field
if (fieldval != targetval)
{
char buf[1000];
sprintf(buf, "lattice: bit field %s too small for value 0x%x (cut from 0x%x)", fieldname, (unsigned int)targetval, (unsigned int)fieldval);
#if defined(_MSC_VER) && _MSC_VER < 1900
sprintf_s
#else
std::snprintf
#endif
(buf, sizeof(buf), "lattice: bit field %s too small for value 0x%x (cut from 0x%x)", fieldname, (unsigned int)targetval, (unsigned int)fieldval);
throw std::runtime_error (buf);
}
}

Просмотреть файл

@ -1,108 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: AcceleratorManager.h
//
// summary: Declares the accelerator manager class
///-------------------------------------------------------------------------------------------------
#pragma once
#include <deque>
#include <vector>
#include <set>
#include "accelerator.h"
#include "PhysicalDevice.h"
#include "Lockable.h"
#include <map>
namespace PTask {
class Task;
class AcceleratorManager : public Lockable
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///-------------------------------------------------------------------------------------------------
AcceleratorManager();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~AcceleratorManager(void);
///-------------------------------------------------------------------------------------------------
/// <summary> Adds a device. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///-------------------------------------------------------------------------------------------------
virtual void AddDevice(Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if 'pAccelerator' is available. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///
/// <returns> true if available, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsAvailable(Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Searches for the first available. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="cls"> The cls. </param>
/// <param name="v"> [in,out] [in,out] If non-null, the v. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL FindAvailable(ACCELERATOR_CLASS cls, std::vector<Accelerator*> &v);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the physical accelerator count. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> The physical accelerator count. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetPhysicalAcceleratorCount();
protected:
/// <summary> The devices </summary>
std::vector<PhysicalDevice*> m_devices;
/// <summary> The available devices </summary>
std::vector<PhysicalDevice*> m_available;
/// <summary> The inflight devices </summary>
std::vector<PhysicalDevice*> m_inflight;
///-------------------------------------------------------------------------------------------------
/// <summary> Searches for the first match for the given accelerator*. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
PhysicalDevice * Find(Accelerator* pAccelerator);
};
};

Просмотреть файл

@ -1,633 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: AsyncContext.h
//
// summary: Declares the asynchronous context class
///-------------------------------------------------------------------------------------------------
#ifndef __ASYNC_CONTEXT_H__
#define __ASYNC_CONTEXT_H__
#include <stdio.h>
#include <crtdbg.h>
#include <deque>
#include <set>
#include "ReferenceCounted.h"
namespace PTask {
class Task;
class SyncPoint;
class AsyncDependence;
class Accelerator;
class AsyncContext : public ReferenceCounted {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDeviceContext"> [in] non-null, context for the device. </param>
/// <param name="pTaskContext"> [in] non-null, context for the task. </param>
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
///-------------------------------------------------------------------------------------------------
AsyncContext(
__in Accelerator * pDeviceContext,
__in Task * pTaskContext,
__in ASYNCCONTEXTTYPE eAsyncContextType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~AsyncContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates a dependence on the synchronization point. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual AsyncDependence *
CreateDependence(
__in ASYNCHRONOUS_OPTYPE eOperationType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates a dependence on the synchronization point. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual AsyncDependence *
CreateDependence(
__in SyncPoint * pSyncPoint,
__in ASYNCHRONOUS_OPTYPE eOperationType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates a synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual SyncPoint * CreateSyncPoint(void * pPSSyncObject);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys a synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL DestroySyncPoint(SyncPoint * pSyncPoint);
///-------------------------------------------------------------------------------------------------
/// <summary> Synchronizes the context. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SynchronizeContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Notifies the device synchronized. </summary>
///
/// <remarks> crossbac, 7/8/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void NotifyDeviceSynchronized();
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence--asynchronous; puts a fence in the command queue
/// for this context. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
OrderSubsequentOperationsAfter(
__in AsyncDependence * pDependence
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence--synchronous </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
SynchronousWait(
__in AsyncDependence * pDependence
);
///-------------------------------------------------------------------------------------------------
/// <summary> Synchronous wait for dependence resolution. </summary>
///
/// <remarks> crossbac, 6/25/2013. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
SynchronousWait(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if 'pDependence' is dependence resolved. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if dependence resolved, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
QueryDependenceOutstanding(
__in AsyncDependence * pDependence
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the device context. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the device context. </returns>
///-------------------------------------------------------------------------------------------------
virtual Accelerator * GetDeviceContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the task context. </summary>
///
/// <remarks> Crossbac, 7/13/2012. </remarks>
///
/// <returns> null if it fails, else the task context. </returns>
///-------------------------------------------------------------------------------------------------
virtual Task * GetTaskContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform context object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the platform context object. </returns>
///-------------------------------------------------------------------------------------------------
virtual void * GetPlatformContextObject()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes this object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Initialize()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this accelerator backing this context encapsulates a backend
/// framework that provides explicit APIs for managing outstanding (Asynchronous)
/// operations. When this is the case, the corresponding AsyncContext subclass can
/// manage outstanding dependences explicitly to increase concurrency and avoid
/// syncing with the device. When it is *not* the case, we must synchronize when we
/// data to and from this accelerator context and contexts that *do* support an
/// explicit async API. For example, CUDA supports the stream and event API to
/// explicitly manage dependences and we use this feature heavily to allow task
/// dispatch to get far ahead of device- side dispatch. However when data moves
/// between CUAccelerators and other accelerator classes, we must use synchronous
/// operations or provide a way to wait for outstanding dependences from those
/// contexts to resolve. This method is used to tell us whether we can create an
/// outstanding dependence after making calls that queue work, or whether we need to
/// synchronize.
///
/// This method simply calls the method of the same name on the (device context)
/// accelerator, and is only provided for convenience.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsExplicitAsyncOperations();
///-------------------------------------------------------------------------------------------------
/// <summary> Locks the accelerator. </summary>
///
/// <remarks> crossbac, 6/26/2013. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual VOID LockAccelerator();
///-------------------------------------------------------------------------------------------------
/// <summary> Unlocks the accelerator. </summary>
///
/// <remarks> crossbac, 6/26/2013. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual VOID UnlockAccelerator();
///-------------------------------------------------------------------------------------------------
/// <summary> Non-blocking check whether the dependence is still outstanding. </summary>
///
/// <remarks> crossbac, 7/2/2013. </remarks>
///
/// <param name="pDep"> [in,out] If non-null, the dep. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
NonblockingQueryOutstanding(
__inout AsyncDependence * pDep
);
///-------------------------------------------------------------------------------------------------
/// <summary> Synchronous wait for outstanding async op--do not acquire locks
/// required to update async and device context state in response
/// to a successful query or wait. </summary>
///
/// <remarks> crossbac, 7/2/2013. </remarks>
///
/// <param name="pDep"> [in,out] If non-null, the dep. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
LocklessWaitOutstanding(
__inout AsyncDependence * pDep
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the type (dedicated purpose) of the asynchronous context. </summary>
///
/// <remarks> crossbac, 7/8/2013. </remarks>
///
/// <returns> The asynchronous context type. </returns>
///-------------------------------------------------------------------------------------------------
virtual ASYNCCONTEXTTYPE
GetAsyncContextType(
VOID
);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence--synchronously. Because we may have to make backend
/// framework calls (e.g. to wait or check CUDA event states) we may require
/// a number of fairly coarse locks, including an accelerator lock. When calling
/// this from task dispatch context, the caller must acquire all locks up front
/// since there are lock ordering displines such as (Accelerator->Datablock) that
/// are there to prevent deadlock for concurrent tasks.
///
/// This version assumes (or rather only asserts) that accelerator locks are held
/// already, so it can be called from dispatch context: Task is a friend class
/// to enable this while minimizing the potential for abuse.
///
/// This is a building block for the public version, which first collects locks,
/// but which cannot be called from a dispatch context as a result.
/// </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
__SynchronousWaitLocksHeld(
__in AsyncDependence * pDependence
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence--synchronously. Because we may have to make backend
/// framework calls (e.g. to wait or check CUDA event states) we may require
/// a number of fairly coarse locks, including an accelerator lock. When calling
/// this from task dispatch context, the caller must acquire all locks up front
/// since there are lock ordering displines such as (Accelerator->Datablock) that
/// are there to prevent deadlock for concurrent tasks.
///
/// This version assumes (or rather only asserts) that accelerator locks are held
/// already, so it can be called from dispatch context: Task is a friend class
/// to enable this while minimizing the potential for abuse.
///
/// This is a building block for the public version, which first collects locks,
/// but which cannot be called from a dispatch context as a result.
/// </summary>
///
/// <remarks> crossbac, 6/25/2013. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
__SynchronousWaitLocksHeld(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Sync points, once marked resolved, can never return to the outstanding state.
/// Consequently, if a lock-free check of the oustanding flag returns false, there is
/// no danger of a race. Conversely, checking if the state is unknown requires
/// accelerator and context locks which restrict concurrency and have lock ordering
/// disciplines that make it difficult to *always* have these locks when this check
/// is required. So a quick check without a lock that can avoid locks when they are
/// unnecessary is a handy tool.
/// </summary>
///
/// <remarks> crossbac, 6/26/2013. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if synchronise point resolved no lock, false if not. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
__IsSyncPointResolvedNoLock(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> DEBUG instrumentation for analyzing the composition of outstanding dependences
/// on this async context. How many are flagged as resolved, how many are *actually*
/// resolved, is the queue monotonic?
/// </summary>
///
/// <remarks> Crossbac, 3/31/2014. </remarks>
///-------------------------------------------------------------------------------------------------
void
AnalyzeOutstandingQueue(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> garbage collect the outstanding queue. Anything no longer outstanding
/// can be removed from the queue. The original version is very
/// conservative about how much it actually cleans up--it only checks flags
/// (and thus avoids back-end API calls to check event status), which is
/// good for performance until the number of outstanding deps piles up.
/// This version attempts to balance these effects by making API calls
/// if the number of outstanding deps goes beyond a threshold. This version
/// can be reinstated with a static member variable s_bUseConservativeGC.
/// The threshold at which to start making API calls is controlled by
/// PTask::Runtime::[Get|Set]AsyncContextGCQueryThreshold().
/// </summary>
///
/// <remarks> Crossbac, 3/31/2014. </remarks>
///-------------------------------------------------------------------------------------------------
void
GarbageCollectOutstandingQueue(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> garbage collect the outstanding queue. Anything no longer outstanding
/// can be removed from the queue. This (old, obsolete) version is very
/// conservative about how much it actually cleans up--it only checks flags
/// (and thus avoids back-end API calls to check event status), which is
/// good for performance until the number of outstanding deps piles up.
/// The new version attempts to balance these effects by making API calls
/// if the number of outstanding deps goes beyond a threshold. This version
/// can be reinstated with a static member variable s_bUseConservativeGC.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void
GarbageCollectOutstandingQueueConservatively(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Truncate queue. Only to be called when the context is known to be synchronized!
/// Marks all outstanding sync points as resolved, and removes them from the
/// outstanding queue.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void
TruncateOutstandingQueue(
__in BOOL bContextSynchronized
);
///-------------------------------------------------------------------------------------------------
/// <summary> Truncate queue. Only to be called when the context is known to be synchronized!
/// Marks all outstanding sync points as resolved, and removes them from the
/// outstanding queue.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void TruncateOutstandingQueueFrom(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence asynchronously by inserting a dependence
/// in the current context (stream) on the event in the sync point.
/// </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
InsertFence(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific create synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual SyncPoint *
PlatformSpecificCreateSyncPoint(
void * pPSSyncObject
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific destroy synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificDestroySynchronizationPoint(
__in SyncPoint * pSyncPoint
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence asynchronously. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificInsertFence(
__in SyncPoint * pSyncPoint
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence synchronously. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificSynchronousWait(
__in SyncPoint * pSyncPoint
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence synchronously without locking the async context
/// or underlying accelerator: this simplifies lock acquisition for such
/// waits, but at the expense of leaving live dependences that are
/// actually resolved. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificLocklessSynchronousWait(
__in SyncPoint * pSyncPoint
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if we can platform specific synchronize context. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificSynchronizeContext(
VOID
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if the sync point is resolved (and marks it if so). </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificQueryOutstanding(
__inout SyncPoint * pSyncPoint
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific nonblocking check whether the event remains outstanding. </summary>
///
/// <remarks> crossbac, 7/2/2013. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificNonblockingQueryOutstanding(
__inout SyncPoint * pSyncPoint
)=0;
std::deque<SyncPoint*> m_qOutstanding;
Accelerator * m_pDeviceContext;
Task * m_pTaskContext;
ASYNCCONTEXTTYPE m_eAsyncContextType;
static BOOL s_bUseConservativeGC;
friend class AsyncDependence;
friend class SyncPoint;
friend class Task;
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a string describing this refcount object. Allows subclasses to
/// provide overrides that make leaks easier to find when detected by the
/// rc profiler.
/// </summary>
///
/// <remarks> Crossbac, 7/9/2013. </remarks>
///
/// <returns> null if it fails, else the rectangle profile descriptor. </returns>
///-------------------------------------------------------------------------------------------------
virtual std::string GetRCProfileDescriptor();
};
};
#endif

Просмотреть файл

@ -1,180 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: AsyncDependence.h
//
// summary: Declares the asynchronous dependence class
///-------------------------------------------------------------------------------------------------
#ifndef __ASYNC_DEPENDENCE_H__
#define __ASYNC_DEPENDENCE_H__
#include "ReferenceCounted.h"
namespace PTask {
class SyncPoint;
class AsyncContext;
class PBuffer;
class AsyncDependence : public ReferenceCounted {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> crossbac, 5/24/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the outstanding
/// asynchronous operations. </param>
/// <param name="pSyncPoint"> [in,out] If non-null, the sync point on which to depend. </param>
/// <param name="eOperationType"> Type of the operation. </param>
///-------------------------------------------------------------------------------------------------
AsyncDependence(
__in AsyncContext * pAsyncContext,
__in SyncPoint * pSyncPoint,
__in ASYNCHRONOUS_OPTYPE eOperationType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~AsyncDependence();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the context. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <returns> null if it fails, else the context. </returns>
///-------------------------------------------------------------------------------------------------
virtual AsyncContext * GetContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform context object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the platform context object. </returns>
///-------------------------------------------------------------------------------------------------
void * GetPlatformContextObject();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform wait object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the platform wait object. </returns>
///-------------------------------------------------------------------------------------------------
void * GetPlatformWaitObject();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the synchronise point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the synchronise point. </returns>
///-------------------------------------------------------------------------------------------------
SyncPoint * GetSyncPoint();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets operation type. </summary>
///
/// <remarks> crossbac, 5/1/2013. </remarks>
///
/// <returns> The operation type. </returns>
///-------------------------------------------------------------------------------------------------
ASYNCHRONOUS_OPTYPE GetOperationType();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is outstanding. </summary>
///
/// <remarks> crossbac, 6/25/2013. </remarks>
///
/// <returns> true if outstanding, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL IsOutstanding();
///-------------------------------------------------------------------------------------------------
/// <summary> Blocking wait complete. </summary>
///
/// <remarks> crossbac, 6/25/2013. </remarks>
///-------------------------------------------------------------------------------------------------
BOOL SynchronousExclusiveWait();
///-------------------------------------------------------------------------------------------------
/// <summary> Lockless wait outstanding: without acquiring any locks attempt to perform a
/// synchronous wait for any outstanding async dependences on this buffer that
/// conflict with an operation of the given type. This is an experimental API,
/// enable/disable with PTask::Runtime::*etTaskDispatchLocklessIncomingDepWait(),
/// attempting to leverage the fact that CUDA apis for waiting on events (which
/// appear to be thread-safe and decoupled from a particular device context)
/// to minimize serialization associated with outstanding dependences on data
/// consumed by tasks that do not require accelerators for any other reason than to
/// wait for such operations to complete.
/// </summary>
///
/// <remarks> crossbac, 7/1/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL LocklessWaitOutstanding();
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if the dependence is outstanding without acquiring device
/// and context locks required to react to resolution if we detect it.
/// </summary>
///
/// <remarks> crossbac, 7/2/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL NonblockingQueryOutstanding();
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if the sync point this dependence encapsulates has been
/// marked resolved or not. The transition from outstanding to resolved
/// is monotonic, so we can make this check without a lock, provided
/// that only a FALSE return value is considered actionable.
/// </summary>
///
/// <remarks> crossbac, 7/2/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL QueryOutstandingFlag();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Blocking wait until complete--locks already held. </summary>
///
/// <remarks> crossbac, 6/25/2013. </remarks>
///-------------------------------------------------------------------------------------------------
BOOL __SynchronousWaitLocksHeld();
AsyncContext * m_pAsyncContext;
SyncPoint * m_pSyncPoint;
ASYNCHRONOUS_OPTYPE m_eOperationType;
friend class PBuffer;
};
};
#endif

Просмотреть файл

@ -1,556 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: BlockPool.h
//
// summary: Declares the block pool class
///-------------------------------------------------------------------------------------------------
#ifndef __BLOCK_POOL_H__
#define __BLOCK_POOL_H__
#include <stdio.h>
#include <crtdbg.h>
#include "datablocktemplate.h"
#include "channel.h"
#include "port.h"
#include "PBuffer.h"
#include <deque>
#include <vector>
#include "BlockPoolOwner.h"
namespace PTask {
class BlockPool : public Lockable {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///-------------------------------------------------------------------------------------------------
BlockPool(
__in DatablockTemplate * pTemplate,
__in BUFFERACCESSFLAGS ePermissions,
__in UINT uiPoolSize,
__in BlockPoolOwner * pPoolOwner
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~BlockPool();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
///-------------------------------------------------------------------------------------------------
void SetRequestsPageLocked(BOOL bPageLocked);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL GetRequestsPageLocked();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a growable. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="bGrowable"> true if growable. </param>
///-------------------------------------------------------------------------------------------------
void SetGrowable(BOOL bGrowable);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is growable. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if growable, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL IsGrowable();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets eager device materialize. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="bEager"> true to eager. </param>
///-------------------------------------------------------------------------------------------------
void SetEagerDeviceMaterialize(BOOL bEager);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets eager device materialize. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL GetEagerDeviceMaterialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="uiSize"> The size. </param>
///-------------------------------------------------------------------------------------------------
void SetPoolSize(UINT uiSize);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> The pool size. </returns>
///-------------------------------------------------------------------------------------------------
UINT GetPoolSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Adds a view memory space. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///-------------------------------------------------------------------------------------------------
void AddViewMemorySpace(Accelerator* pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Adds a view memory space. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///-------------------------------------------------------------------------------------------------
void AddViewMemorySpace(UINT uiMemorySpace);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a destination buffer for a block with an upstream
/// allocator. Succeeds only if the pool happens to have blocks
/// backed by sufficient resources in all channels that are backed.
/// </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
///
/// <returns> null if it fails, else the destination buffer. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock *
GetPooledBlock(
__in Accelerator * pAccelerator=NULL,
__in UINT uiDataBytes=0,
__in UINT uiMetaBytes=0,
__in UINT uiTemplateBytes=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Adds to the pool. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void AddNewBlock(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> return to the pool. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void ReturnBlock(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has block pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if block pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasBlockPool();
///-------------------------------------------------------------------------------------------------
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
/// we have an upstream allocator (meta) port, the runtime will not create a block
/// pool for the corresponding output port. This turns out to put device-side
/// allocation on the critical path in some cases, so we provide a way to override
/// that behavior and allow a port to create a pool based on some size hints. When
/// there is a block available with sufficient space in the pool, the meta port can
/// avoid the allocation and draw from the pool.
/// </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///
/// <param name="nPoolSize"> Size of the block pool. </param>
/// <param name="nStride"> The stride. </param>
/// <param name="nDataBytes"> The data in bytes. </param>
/// <param name="nMetaBytes"> The meta in bytes. </param>
/// <param name="nTemplateBytes"> The template in bytes. </param>
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
/// <param name="bEagerMaterialize"> (optional) the eager materialize. </param>
///-------------------------------------------------------------------------------------------------
virtual void
ForceBlockPoolHint(
__in UINT nPoolSize,
__in UINT nStride,
__in UINT nDataBytes,
__in UINT nMetaBytes,
__in UINT nTemplateBytes,
__in BOOL bPageLockHostViews=FALSE,
__in BOOL bEagerMaterialize=FALSE
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Synchronous version: allocates buffers and populates any device side
/// views in one go. If graph construction performance matters, this is
/// not a good way to do it, since memory allocation causes synchronization.
/// The asynchronous variant does it in several passes, allowing us
/// to overlap the copy.
///
/// Allocation of data-blocks and platform-specific buffers can be a signficant
/// latency expense at dispatch time. We can actually preallocate output datablocks
/// and create device- side buffers at graph construction time. For each node in the
/// graph, allocate data blocks on any output ports, and create device-specific
/// buffers for all accelerators capable of executing the node.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPool(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
/// to null, and then release it. </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void
DestroyBlockPool(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles only the first pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPoolAsync(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles the second pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
FinalizeBlockPoolAsync(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is enabled. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> true if enabled, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL IsEnabled();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets high water mark. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
UINT GetHighWaterMark();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the low water mark. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
UINT GetLowWaterMark();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the currently available count. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
UINT GetAvailableBlockCount();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the total number of blocks owned by the pool. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
UINT GetOwnedBlockCount();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets the number of blocks by which the pool should grow if
/// it grows in response to dynamic demand.
/// </summary>
///
/// <remarks> crossbac, 6/20/2013. </remarks>
///
/// <param name="uiBlockCount"> Number of blocks. </param>
///-------------------------------------------------------------------------------------------------
void SetGrowIncrement(UINT uiBlockCount);
UINT GetGrowIncrement();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate a block based on the hint size (rather than the template!).
/// We do not support an async variant of this yet.
/// </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pAccelerator"> (optional) If non-null, the accelerator. </param>
/// <param name="uiDataBytes"> The data in bytes. </param>
/// <param name="uiMetaBytes"> The meta in bytes. </param>
/// <param name="uiTemplateBytes"> The template in bytes. </param>
///
/// <returns> null if it fails, else the new block. </returns>
///-------------------------------------------------------------------------------------------------
Datablock *
AllocateBlockWithPoolHint(
__in UINT uiDataBytes,
__in UINT uiMetaBytes,
__in UINT uiTemplateBytes
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pAccelerator"> [in,out] (optional) If non-null, the accelerator. </param>
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
/// <param name="bPooled"> true to pooled. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
Datablock *
AllocateBlockForPool(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate a block as part of asynchronous pool construction.
/// </summary>
///
/// <remarks> crossbac, 4/30/2013. </remarks>
///
/// <param name="bFinalized"> [in,out] The finalized. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
Datablock *
AllocateBlockForPoolAsync(
__out BOOL &bFinalized
);
///-------------------------------------------------------------------------------------------------
/// <summary> Finalize a block allocated with the async variant. Basically
/// we need to populate any views on this pass.
/// </summary>
///
/// <remarks> crossbac, 4/30/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
void FinalizeBlock(
__in Datablock * pBlock
);
///-------------------------------------------------------------------------------------------------
/// <summary> Grows the pool by the given number of blocks. </summary>
///
/// <remarks> crossbac, 6/20/2013. </remarks>
///
/// <param name="uiBlockCount"> Number of blocks. </param>
///-------------------------------------------------------------------------------------------------
void Grow(UINT uiBlockCount);
/// <summary> The template. </summary>
DatablockTemplate * m_pTemplate;
/// <summary> Size of the maximum block pool </summary>
int m_nMaxPoolSize;
/// <summary> The block pool </summary>
std::deque<Datablock*> m_pBlockPool;
/// <summary> True if we have provided hints for block pool management
/// that are not present in the template.
/// </summary>
BOOL m_bPoolHintsSet;
/// <summary> If the m_bPoolHintsSet member is true, this member
/// controls the size of the block pool.
/// </summary>
UINT m_nPoolHintPoolSize;
/// <summary> If the m_bPoolHintsSet member is true, this member
/// controls the stride of the block pool.
/// </summary>
UINT m_nPoolHintStride;
/// <summary> If the m_bPoolHintsSet member is true, this member
/// controls the data channel size of the block pool.
/// </summary>
UINT m_nPoolHintDataBytes;
/// <summary> If the m_bPoolHintsSet member is true, this member
/// controls the meta channel size of the block pool.
/// </summary>
UINT m_nPoolHintMetaBytes;
/// <summary> If the m_bPoolHintsSet member is true, this member
/// controls the template channel size of the block pool.
/// </summary>
UINT m_nPoolHintTemplateBytes;
/// <summary> True if host buffers for datablocks in this pool
/// should be allocated from page-locked memory
/// </summary>
BOOL m_bPageLockHostViews;
/// <summary> true to eager device materialize. </summary>
BOOL m_bEagerDeviceMaterialize;
/// <summary> The memory spaces in which these blocks can reasonably
/// require a view. </summary>
std::set<Accelerator*> m_vAccelerators;
/// <summary> The permissions for blocks in this pool. </summary>
BUFFERACCESSFLAGS m_ePermissions;
/// <summary> true if growable. </summary>
BOOL m_bGrowable;
/// <summary> true if this object has initial value. </summary>
BOOL m_bHasInitialValue;
/// <summary> The initial value. </summary>
HOSTMEMORYEXTENT m_vInitialValue;
/// <summary> The owner of the pool. </summary>
BlockPoolOwner * m_pPoolOwner;
/// <summary> blocks allocated with async variant that require finalization. </summary>
std::vector<Datablock*> m_vOutstandingBlocks;
/// <summary> The dirty. </summary>
std::set<Datablock*> m_vDirty;
/// <summary> The block count high water mark. </summary>
UINT m_uiHighWaterMark;
/// <summary> The block count low water mark. </summary>
UINT m_uiLowWaterMark;
/// <summary> The owned blocks. </summary>
UINT m_uiOwnedBlocks;
/// <summary> The grow increment. </summary>
UINT m_uiGrowIncrement;
/// <summary> true to enable, false to disable. </summary>
BOOL m_bEnabled;
BOOL Contains(Datablock * pBlock);
void ReleaseBlocks();
void LockTargetAccelerators();
void UnlockTargetAccelerators();
friend class Port;
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Check that the block pool contain only datablocks with no control signals. </summary>
///
/// <remarks> Crossbac, 3/2/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void CheckBlockPoolStates();
};
};
#endif

Просмотреть файл

@ -1,445 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: BlockPoolOwner.h
//
// summary: Declares the block pool owner class
///-------------------------------------------------------------------------------------------------
#ifndef __BLOCK_POOL_OWNER_H__
#define __BLOCK_POOL_OWNER_H__
#include <stdio.h>
#include <crtdbg.h>
#include <deque>
#include <vector>
#include <map>
namespace PTask {
class Graph;
class Datablock;
class DatablockTemplate;
class BlockPoolOwner {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes the block pool manager. Because PTask objects are
/// reference counted, it is difficult to enforce life-cycle relationships
/// that appear to be implied by member containment. For block pools, it
/// is entirely possible that user code (or internal code) keeps a reference to a datablock
/// after the block pool from which it came is destroyed or deleted. Consequently,
/// the block pool owner pointer is not guaranteed to be valid when a block is released,
/// and we must keep a global list of what block pool objects are actually valid and
/// active to avoid attempting to return a block to a pool that has been deleted.
/// This method creates the data structures pertinent to maintaining that information.
/// </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void
InitializeBlockPoolManager(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroy the block pool manager. Because PTask objects are
/// reference counted, it is difficult to enforce life-cycle relationships
/// that appear to be implied by member containment. For block pools, it
/// is entirely possible that user code (or internal code) keeps a reference to a datablock
/// after the block pool from which it came is destroyed or deleted. Consequently,
/// the block pool owner pointer is not guaranteed to be valid when a block is released,
/// and we must keep a global list of what block pool objects are actually valid and
/// active to avoid attempting to return a block to a pool that has been deleted.
/// This method cleans up the data structures pertinent to maintaining that information.
/// </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void
DestroyBlockPoolManager(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Is a block pool owner pointer valid? Because PTask objects are reference counted,
/// it is difficult to enforce life-cycle relationships that appear to be implied by
/// member containment. For block pools, it is entirely possible that user code (or
/// internal code) keeps a reference to a datablock after the block pool from which
/// it came is destroyed or deleted. Consequently, the block pool owner pointer is
/// not guaranteed to be valid when a block is released, and we must keep a global
/// list of what block pool objects are actually valid and active to avoid attempting
/// to return a block to a pool that has been deleted.
/// </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <param name="pOwner"> [in,out] If non-null, the owner. </param>
///
/// <returns> true if a pool owner is active, false if not. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
IsPoolOwnerActive(
__in BlockPoolOwner * pOwner
);
///-------------------------------------------------------------------------------------------------
/// <summary> Add a new block pool owner to the global list. Because PTask objects are
/// reference counted, it is difficult to enforce life-cycle relationships that
/// appear to be implied by member containment. For block pools, it is entirely
/// possible that user code (or internal code) keeps a reference to a datablock after
/// the block pool from which it came is destroyed or deleted. Consequently, the
/// block pool owner pointer is not guaranteed to be valid when a block is released,
/// and we must keep a global list of what block pool objects are actually valid and
/// active to avoid attempting to return a block to a pool that has been deleted.
/// </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
/// <param name="pOwner"> [in,out] If non-null, the owner. </param>
///-------------------------------------------------------------------------------------------------
static void
RegisterActivePoolOwner(
__in Graph * pGraph,
__in BlockPoolOwner * pOwner
);
///-------------------------------------------------------------------------------------------------
/// <summary> Retire a block pool owner from the global list. Because PTask objects are
/// reference counted, it is difficult to enforce life-cycle relationships that
/// appear to be implied by member containment. For block pools, it is entirely
/// possible that user code (or internal code) keeps a reference to a datablock after
/// the block pool from which it came is destroyed or deleted. Consequently, the
/// block pool owner pointer is not guaranteed to be valid when a block is released,
/// and we must keep a global list of what block pool objects are actually valid and
/// active to avoid attempting to return a block to a pool that has been deleted.
/// </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <param name="pOwner"> [in,out] If non-null, the owner. </param>
///-------------------------------------------------------------------------------------------------
static void
RetirePoolOwner(
__in BlockPoolOwner * pOwner
);
///-------------------------------------------------------------------------------------------------
/// <summary> Retire all block pool owner from the given graph. Because PTask objects are
/// reference counted, it is difficult to enforce life-cycle relationships that
/// appear to be implied by member containment. For block pools, it is entirely
/// possible that user code (or internal code) keeps a reference to a datablock after
/// the block pool from which it came is destroyed or deleted. Consequently, the
/// block pool owner pointer is not guaranteed to be valid when a block is released,
/// and we must keep a global list of what block pool objects are actually valid and
/// active to avoid attempting to return a block to a pool that has been deleted.
/// </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <param name="pOwner"> [in,out] If non-null, the owner. </param>
///-------------------------------------------------------------------------------------------------
static void
RetireGraph(
__in Graph * pGraph
);
///-------------------------------------------------------------------------------------------------
/// <summary> add a new block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void AddNewBlock(Datablock * pBlock)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> return a block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void ReturnToPool(Datablock * pBlock)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Query if the owned pool is a global pool. </summary>
///
/// <remarks> crossbac, 8/30/2013. </remarks>
///
/// <returns> true if global pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL BlockPoolIsGlobal()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
///
/// Allocation of data-blocks and platform-specific buffers can be a signficant
/// latency expense at dispatch time. We can actually preallocate output datablocks
/// and create device- side buffers at graph construction time. For each node in the
/// graph, allocate data blocks on any output ports, and create device-specific
/// buffers for all accelerators capable of executing the node.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPool(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles only the first pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPoolAsync(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles the second pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
FinalizeBlockPoolAsync(
VOID
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
/// to null, and then release it. </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void
DestroyBlockPool(
VOID
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has block pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if block pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasBlockPool()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
/// we have an upstream allocator (meta) port, the runtime will not create a block
/// pool for the corresponding output port. This turns out to put device-side
/// allocation on the critical path in some cases, so we provide a way to override
/// that behavior and allow a port to create a pool based on some size hints. When
/// there is a block available with sufficient space in the pool, the meta port can
/// avoid the allocation and draw from the pool.
/// </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///
/// <param name="nPoolSize"> Size of the block pool. </param>
/// <param name="nStride"> The stride. </param>
/// <param name="nDataBytes"> The data in bytes. </param>
/// <param name="nMetaBytes"> The meta in bytes. </param>
/// <param name="nTemplateBytes"> The template in bytes. </param>
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
///-------------------------------------------------------------------------------------------------
virtual void
ForceBlockPoolHint(
__in UINT nPoolSize,
__in UINT nStride,
__in UINT nDataBytes,
__in UINT nMetaBytes,
__in UINT nTemplateBytes,
__in BOOL bPageLockHostViews=FALSE,
__in BOOL bEagerDeviceMaterialize=FALSE
)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> The pool size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
GetPoolSize()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Sets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetRequestsPageLocked(BOOL bPageLocked)=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Gets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL GetRequestsPageLocked()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the owner name. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> null if it fails, else the owner name. </returns>
///-------------------------------------------------------------------------------------------------
virtual char * GetPoolOwnerName()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> true if a block pool is active, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsBlockPoolActive()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Gets high water mark. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetHighWaterMark()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the total number of blocks owned by the pool. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The total number of blocks owned by the pool (whether they are queued or not). </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetOwnedBlockCount()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the low water mark. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetLowWaterMark()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the currently available count. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetAvailableBlockCount()=0;
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a destination buffer for a block with an upstream
/// allocator. Succeeds only if the pool happens to have blocks
/// backed by sufficient resources in all channels that are backed.
/// </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
///
/// <returns> null if it fails, else the destination buffer. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock *
GetBlockFromPool(
__in Accelerator * pAccelerator=NULL,
__in UINT uiDataBytes=0,
__in UINT uiMetaBytes=0,
__in UINT uiTemplateBytes=0
)=0;
/// <summary> The lock for the block pool owners. </summary>
static CRITICAL_SECTION s_csBlockPoolOwners;
/// <summary> true if block pool owner managment is initialized. </summary>
static LONG s_bPoolOwnersInit;
/// <summary> The active pool owners. </summary>
static std::map<BlockPoolOwner*, Graph*> s_vActivePoolOwners;
/// <summary> The dead pool owners. </summary>
static std::map<BlockPoolOwner*, Graph*> s_vDeadPoolOwners;
};
};
#endif

Просмотреть файл

@ -1,213 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: CLAsyncContext.h
//
// summary: Declares the OpenCL asynchronous context class
///-------------------------------------------------------------------------------------------------
#ifndef __CL_ASYNC_CONTEXT_H__
#define __CL_ASYNC_CONTEXT_H__
#ifdef OPENCL_SUPPORT
#include "primitive_types.h"
#include "accelerator.h"
#include "claccelerator.h"
#include "task.h"
#include "channel.h"
#include "hrperft.h"
#include "AsyncContext.h"
#include "AsyncDependence.h"
#include <map>
#include <vector>
#include <list>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> OpenCL asynchronous context. </summary>
///
/// <remarks> crossbac, 6/18/2012.
///
/// FIXME: TODO:
/// -------------------
/// OpenCL supports events and command queues such that we can implement fine grain
/// dependences exactly as they are implemented for the cuda backend. Currently there
/// just isn't enough demand for the OpenCL backend to justify prioritizing that
/// development effort. Hence, all OpenCL calls are currently synchronous, and the
/// platform-specific work of managing dependences and waiting for them to resove can
/// be completely elided.
///
/// </remarks>
///-------------------------------------------------------------------------------------------------
class CLAsyncContext : public AsyncContext {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDeviceContext"> [in,out] If non-null, context for the device. </param>
/// <param name="pTaskContext"> [in,out] If non-null, context for the task. </param>
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
///-------------------------------------------------------------------------------------------------
CLAsyncContext(
__in Accelerator * pDeviceContext,
__in Task * pTaskContext,
__in ASYNCCONTEXTTYPE eAsyncContextType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~CLAsyncContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes this object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Initialize();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific create synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual SyncPoint *
PlatformSpecificCreateSyncPoint(
void * pPSSyncObject
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific destroy synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificDestroySynchronizationPoint(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if we can platform specific synchronize context. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificSynchronizeContext(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence asynchronously. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificInsertFence(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence synchronously. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificSynchronousWait(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if the sync point is resolved (and marks it if so). </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificQueryOutstanding(
__inout SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific nonblocking check whether the event remains outstanding. </summary>
///
/// <remarks> crossbac, 7/2/2013. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificNonblockingQueryOutstanding(
__inout SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence synchronously without locking the async context
/// or underlying accelerator: this simplifies lock acquisition for such
/// waits, but at the expense of leaving live dependences that are
/// actually resolved. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificLocklessSynchronousWait(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform context object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the platform context object. </returns>
///-------------------------------------------------------------------------------------------------
virtual void *
GetPlatformContextObject();
};
};
#endif
#endif

Просмотреть файл

@ -1,249 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: CUAsyncContext.h
//
// summary: Declares the cu asynchronous context class
///-------------------------------------------------------------------------------------------------
#ifndef __CUDA_ASYNC_CONTEXT_H__
#define __CUDA_ASYNC_CONTEXT_H__
#ifdef CUDA_SUPPORT
#include "primitive_types.h"
#include "accelerator.h"
#include "AsyncContext.h"
#include "cuhdr.h"
#include <map>
#include <vector>
#include <list>
namespace PTask {
class Task;
class SyncPoint;
class Accelerator;
class AsyncDependence;
class CUAsyncContext : public AsyncContext {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDeviceContext"> [in,out] If non-null, context for the device. </param>
/// <param name="pTaskContext"> [in,out] If non-null, context for the task. </param>
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
///-------------------------------------------------------------------------------------------------
CUAsyncContext(
__in Accelerator * pDeviceContext,
__in Task * pTaskContext,
__in ASYNCCONTEXTTYPE eAsyncContextType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~CUAsyncContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes this object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Initialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Notifies the device synchronized. </summary>
///
/// <remarks> crossbac, 7/8/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void NotifyDeviceSynchronized();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific create synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual SyncPoint *
PlatformSpecificCreateSyncPoint(
void * pPSSyncObject
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific destroy synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificDestroySynchronizationPoint(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if we can platform specific synchronize context. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificSynchronizeContext(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence asynchronously. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificInsertFence(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence synchronously. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificSynchronousWait(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if the sync point is resolved (and marks it if so). </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificQueryOutstanding(
__inout SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific nonblocking check whether the event remains outstanding. </summary>
///
/// <remarks> crossbac, 7/2/2013. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificNonblockingQueryOutstanding(
__inout SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence synchronously without locking the async context
/// or underlying accelerator: this simplifies lock acquisition for such
/// waits, but at the expense of leaving live dependences that are
/// actually resolved. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificLocklessSynchronousWait(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform context object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the platform context object. </returns>
///-------------------------------------------------------------------------------------------------
virtual void *
GetPlatformContextObject();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets stream priority. </summary>
///
/// <remarks> Crossbac, 3/20/2014. </remarks>
///
/// <returns> The stream priority. </returns>
///-------------------------------------------------------------------------------------------------
int GetStreamPriority();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets stream priority. </summary>
///
/// <remarks> Crossbac, 3/20/2014. </remarks>
///
/// <param name="nPriority"> The priority. </param>
///-------------------------------------------------------------------------------------------------
void SetStreamPriority(int nPriority);
/// <summary> The stream. </summary>
CUstream m_hStream;
/// <summary> The last fence. </summary>
CUevent m_hLastFence;
/// <summary> The event. </summary>
CUevent m_hEvent;
/// <summary> The stream priority. </summary>
int m_nStreamPriority;
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a string describing this refcount object. Allows subclasses to
/// provide overrides that make leaks easier to find when detected by the
/// rc profiler.
/// </summary>
///
/// <remarks> Crossbac, 7/9/2013. </remarks>
///
/// <returns> null if it fails, else the rectangle profile descriptor. </returns>
///-------------------------------------------------------------------------------------------------
virtual std::string GetRCProfileDescriptor();
};
};
#endif // CUDA_SUPPORT
#endif

Просмотреть файл

@ -1,181 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: ChannelProfiler.h
//
// summary: Declares the channel profiler class
///-------------------------------------------------------------------------------------------------
#ifndef __CHANNEL_PROFILER_H__
#define __CHANNEL_PROFILER_H__
#include "primitive_types.h"
#include <sstream>
namespace PTask {
class Channel;
typedef struct __channel_stats_t {
/// <summary> The block throughput limit. </summary>
UINT uiBlockTransitLimit;
/// <summary> The blocks delivered. </summary>
UINT uiBlocksDelivered;
/// <summary> The maximum occupancy. </summary>
UINT uiMaxOccupancy;
/// <summary> The cumulative occupancy. </summary>
UINT uiCumulativeOccupancy;
/// <summary> The occupancy samples. </summary>
UINT uiOccupancySamples;
/// <summary> The capacity. </summary>
UINT uiCapacity;
/// <summary> true if the channel is/was a pool owner. </summary>
BOOL bPoolOwner;
///-------------------------------------------------------------------------------------------------
/// <summary> Resets the stats object. </summary>
///
/// <remarks> Crossbac, 7/19/2013. </remarks>
///
/// <param name="parameter1"> The first parameter. </param>
///-------------------------------------------------------------------------------------------------
void Reset(
VOID
)
{
uiBlockTransitLimit = 0;
uiBlocksDelivered = 0;
uiMaxOccupancy = 0;
uiCumulativeOccupancy = 0;
uiOccupancySamples = 0;
uiCapacity = 0;
bPoolOwner = 0;
}
///-------------------------------------------------------------------------------------------------
/// <summary> Updates the stats object with a current snapshot of the channel state. </summary>
///
/// <remarks> Crossbac, 7/19/2013. </remarks>
///
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
///-------------------------------------------------------------------------------------------------
void Update(
Channel * pChannel
)
{
uiBlockTransitLimit = pChannel->GetBlockTransitLimit();
uiBlocksDelivered = pChannel->GetCumulativeBlockTransit();
uiMaxOccupancy = pChannel->GetMaxOccupancy();
uiCumulativeOccupancy = pChannel->GetCumulativeOccupancy();
uiOccupancySamples = pChannel->GetOccupancySamples();
uiCapacity = pChannel->GetCapacity();
bPoolOwner = pChannel->IsPoolOwner();
}
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 7/18/2013. </remarks>
///-------------------------------------------------------------------------------------------------
__channel_stats_t::__channel_stats_t(
VOID
)
{
Reset();
}
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 7/19/2013. </remarks>
///
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
///-------------------------------------------------------------------------------------------------
__channel_stats_t::__channel_stats_t(
Channel * pChannel
)
{
Update(pChannel);
}
} CHANNELSTATISTICS;
class ChannelProfiler {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 7/18/2013. </remarks>
///
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
///-------------------------------------------------------------------------------------------------
ChannelProfiler(Channel * pChannel);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 7/18/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~ChannelProfiler();
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes this object. </summary>
///
/// <remarks> Crossbac, 7/18/2013. </remarks>
///
/// <param name="bEnable"> true to enable, false to disable. </param>
///-------------------------------------------------------------------------------------------------
static void Initialize(BOOL bEnable);
///-------------------------------------------------------------------------------------------------
/// <summary> De-initialises this object and frees any resources it is using. </summary>
///
/// <remarks> Crossbac, 7/18/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void Deinitialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Reports the given ss. </summary>
///
/// <remarks> Crossbac, 7/18/2013. </remarks>
///
/// <param name="ss"> [in,out] The ss. </param>
///-------------------------------------------------------------------------------------------------
static void Report(std::ostream& ss);
///-------------------------------------------------------------------------------------------------
/// <summary> Merge instance statistics. </summary>
///
/// <remarks> Crossbac, 7/18/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void MergeInstanceStatistics();
protected:
Channel * m_pChannel;
static BOOL m_bChannelProfile;
static BOOL m_bChannelProfileInit;
static CRITICAL_SECTION m_csChannelStats;
static std::map<std::string, std::map<std::string, CHANNELSTATISTICS*>*> m_vChannelStats;
};
};
#endif

Просмотреть файл

@ -1,998 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: CoherenceProfiler.h
//
// summary: Declares the coherence profiler class
///-------------------------------------------------------------------------------------------------
#ifndef __COHERENCE_PROFILER_H__
#define __COHERENCE_PROFILER_H__
#include "primitive_types.h"
#include <map>
#include <string>
#include <assert.h>
class CHighResolutionTimer;
namespace PTask {
class Port;
class Task;
class Datablock;
///-------------------------------------------------------------------------------------------------
/// <summary> Event types that can cause a coherence state transition. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///-------------------------------------------------------------------------------------------------
typedef enum COHERENCEEVENT_t {
/// <summary> The event causing the transition was not specified. </summary>
CET_UNSPECIFIED = 0,
/// <summary> The state transition was triggered by a binding to task input</summary>
CET_BIND_INPUT = 1,
/// <summary> The state transition was triggered by a binding to taks output</summary>
CET_BIND_OUTPUT = 2,
/// <summary> The state transition was triggered by a binding to a task constant port</summary>
CET_BIND_CONSTANT = 3,
/// <summary> The state transition was triggered by pushing into multiple consumer channels </summary>
CET_PUSH_DOWNSTREAM_SHARE = 4,
/// <summary> The state transition was triggered by a user request for a pointer in host space</summary>
CET_POINTER_REQUEST = 5,
/// <summary> The state transition was triggered by the deletion of the block</summary>
CET_BLOCK_DELETE = 6,
/// <summary> The state transition was triggered by the cloning of the block </summary>
CET_BLOCK_CLONE = 7,
/// <summary> The state transition was triggered by block allocation </summary>
CET_BLOCK_CREATE = 8,
/// <summary> we are updating the host view of the block, but don't actually have
/// access to the information we need to figure out what action
/// triggered the view update. Most likely a user request
/// </summary>
CET_HOST_VIEW_UPDATE = 9,
/// <summary> we are updating the device view of the block, but don't actually have
/// access to the information we need to figure out what action
/// triggered the view update. Most likely a user request
/// </summary>
CET_ACCELERATOR_VIEW_UPDATE = 10,
/// <summary> Buffers are being allocated for this block </summary>
CET_BUFFER_ALLOCATE = 11,
/// <summary> a request to grow the buffer caused some buffer reallocation and
/// potentially view updates as a side effect. </summary>
CET_GROW_BUFFER = 12,
/// <summary> a request to synthesize a metadata block caused the traffic </summary>
CET_SYNTHESIZE_BLOCK = 13,
/// <summary> needed a pinned host buffer in addition to a dev buffer </summary>
CET_PINNED_HOST_VIEW_CREATE = 14,
} COHERENCEEVENTTYPE;
///-------------------------------------------------------------------------------------------------
/// <summary> Defines a structure for collecting detailed data for
/// a coherence state transition. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///-------------------------------------------------------------------------------------------------
typedef struct COHERENCETRANSITION_t {
public:
/// <summary> True if this transition has completed and this record should
/// no longer be allowed to change.
/// </summary>
BOOL bFinalized;
/// <summary> True if a data transfer occurred for this transition. </summary>
BOOL bXferOccurred;
/// <summary> The timestamp at the start of the transition. </summary>
double nStartTimestamp;
/// <summary> The timestamp at the end of the transition. </summary>
double nEndTimestamp;
/// <summary> Identifier for the source memory space. </summary>
UINT uiSrcMemorySpaceId;
/// <summary> Identifier for the destination memory space. </summary>
UINT uiDstMemorySpaceId;
/// <summary> The event that triggered this transition. </summary>
COHERENCEEVENTTYPE eTriggerEvent;
/// <summary> The requested state of the block in response to the event. </summary>
BUFFER_COHERENCE_STATE eTargetState;
/// <summary> The start state of the block (snapshot of the state per memory space). </summary>
BUFFER_COHERENCE_STATE eStartState[MAX_MEMORY_SPACES];
/// <summary> The end state of the block (snapshot of the state per memory space). </summary>
BUFFER_COHERENCE_STATE eEndState[MAX_MEMORY_SPACES];
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///-------------------------------------------------------------------------------------------------
COHERENCETRANSITION_t(
double dStartTimestamp
)
{
bFinalized = FALSE;
bXferOccurred = FALSE;
nStartTimestamp = dStartTimestamp;
eTriggerEvent = CET_UNSPECIFIED;
eTargetState = BSTATE_NO_ENTRY;
uiSrcMemorySpaceId = HOST_MEMORY_SPACE_ID;
uiDstMemorySpaceId = HOST_MEMORY_SPACE_ID;
for(int i=0; i<MAX_MEMORY_SPACES; i++) {
eStartState[i] = BSTATE_NO_ENTRY;
eEndState[i] = BSTATE_NO_ENTRY;
}
}
///-------------------------------------------------------------------------------------------------
/// <summary> Finalizes this record. </summary>
///
/// <remarks> Crossbac, 9/19/2012. </remarks>
///
/// <param name="dEndTimestamp"> The end timestamp. </param>
///-------------------------------------------------------------------------------------------------
void
Finalize(
double dEndTimestamp,
BOOL bTransfer
)
{
nEndTimestamp = dEndTimestamp;
bFinalized = TRUE;
bXferOccurred = bTransfer;
}
///-------------------------------------------------------------------------------------------------
/// <summary> Check that all the memory spaces have compatible states in the snapshot. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
UINT
GetNumberOfValidCopies(
__in BUFFER_COHERENCE_STATE * pSnapshot
)
{
UINT nValidEntries = 0;
UINT nMemSpaces = MemorySpace::GetNumberOfMemorySpaces();
for(UINT i=HOST_MEMORY_SPACE_ID; i<nMemSpaces; i++) {
// count up number of copies in various states.
BUFFER_COHERENCE_STATE uiCoherenceState = pSnapshot[i];
switch(uiCoherenceState) {
case BSTATE_NO_ENTRY: break;
case BSTATE_INVALID: break;
case BSTATE_SHARED: nValidEntries++; break;
case BSTATE_EXCLUSIVE: nValidEntries++; break;
}
}
return nValidEntries;
}
///-------------------------------------------------------------------------------------------------
/// <summary> Check that all the memory spaces have compatible states in the snapshot. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
ValidState(
__in BUFFER_COHERENCE_STATE * pSnapshot
)
{
UINT nInvalidEntries = 0;
UINT nNoEntryEntries = 0;
UINT nExclusiveCopies = 0;
UINT nSharedCopies = 0;
UINT nMemSpaces = MemorySpace::GetNumberOfMemorySpaces();
for(UINT i=HOST_MEMORY_SPACE_ID; i<nMemSpaces; i++) {
// count up number of copies in various states.
BUFFER_COHERENCE_STATE uiCoherenceState = pSnapshot[i];
switch(uiCoherenceState) {
case BSTATE_NO_ENTRY: nInvalidEntries++; break;
case BSTATE_INVALID: nNoEntryEntries++; break;
case BSTATE_SHARED: nSharedCopies++; break;
case BSTATE_EXCLUSIVE: nExclusiveCopies++; break;
}
}
BOOL bCorrectSharedState = (nSharedCopies >= 0 && nExclusiveCopies == 0);
BOOL bCorrectExclusiveState = (nSharedCopies == 0 && nExclusiveCopies == 1);
assert(bCorrectSharedState || bCorrectExclusiveState);
return bCorrectSharedState || bCorrectExclusiveState;
}
///-------------------------------------------------------------------------------------------------
/// <summary> Check that all the memory spaces have compatible states in the snapshot. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BUFFER_COHERENCE_STATE
GetCollectiveState(
__in BUFFER_COHERENCE_STATE * pSnapshot
)
{
assert(ValidState(pSnapshot));
UINT nInvalidEntries = 0;
UINT nNoEntryEntries = 0;
UINT nExclusiveCopies = 0;
UINT nSharedCopies = 0;
UINT nMemSpaces = MemorySpace::GetNumberOfMemorySpaces();
for(UINT i=HOST_MEMORY_SPACE_ID; i<nMemSpaces; i++) {
// count up number of copies in various states.
BUFFER_COHERENCE_STATE uiCoherenceState = pSnapshot[i];
switch(uiCoherenceState) {
case BSTATE_NO_ENTRY: nInvalidEntries++; break;
case BSTATE_INVALID: nNoEntryEntries++; break;
case BSTATE_SHARED: nSharedCopies++; break;
case BSTATE_EXCLUSIVE: nExclusiveCopies++; break;
}
}
if(nExclusiveCopies > 0) return BSTATE_EXCLUSIVE;
if(nSharedCopies > 0) return BSTATE_SHARED;
if(nInvalidEntries > 0) return BSTATE_INVALID;
return BSTATE_NO_ENTRY;
}
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a start state. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pSnapshot"> If non-null, the snapshot. </param>
///
/// <returns> The start state. </returns>
///-------------------------------------------------------------------------------------------------
BUFFER_COHERENCE_STATE
GetStartState(
VOID
)
{
return GetCollectiveState(eStartState);
}
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the final state for the state transition. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pSnapshot"> If non-null, the snapshot. </param>
///
/// <returns> The start state. </returns>
///-------------------------------------------------------------------------------------------------
BUFFER_COHERENCE_STATE
GetFinalState(
VOID
)
{
return GetCollectiveState(eEndState);
}
///-------------------------------------------------------------------------------------------------
/// <summary> number of valid copies in an accelerator space. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
UINT
GetNumberOfValidAcceleratorCopies(
__in BUFFER_COHERENCE_STATE * pSnapshot
)
{
UINT nValidEntries = 0;
UINT nMemSpaces = MemorySpace::GetNumberOfMemorySpaces();
for(UINT i=HOST_MEMORY_SPACE_ID+1; i<nMemSpaces; i++) {
// count up number of copies in various states.
BUFFER_COHERENCE_STATE uiCoherenceState = pSnapshot[i];
switch(uiCoherenceState) {
case BSTATE_NO_ENTRY: break;
case BSTATE_INVALID: break;
case BSTATE_SHARED: nValidEntries++; break;
case BSTATE_EXCLUSIVE: nValidEntries++; break;
}
}
return nValidEntries;
}
///-------------------------------------------------------------------------------------------------
/// <summary> number of valid copies in host space. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
UINT
GetNumberOfValidHostCopies(
__in BUFFER_COHERENCE_STATE * pSnapshot
)
{
BUFFER_COHERENCE_STATE uiCoherenceState = pSnapshot[HOST_MEMORY_SPACE_ID];
switch(uiCoherenceState) {
case BSTATE_NO_ENTRY: return 0;
case BSTATE_INVALID: return 0;
case BSTATE_SHARED: return 1;
case BSTATE_EXCLUSIVE: return 1;
}
return 0;
}
///-------------------------------------------------------------------------------------------------
/// <summary> number of valid copies in an accelerator space. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
UINT
GetStartNumberOfValidAcceleratorCopies(
void
)
{
return GetNumberOfValidAcceleratorCopies(eStartState);
}
///-------------------------------------------------------------------------------------------------
/// <summary> number of valid copies in an accelerator space. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
UINT
GetFinalNumberOfValidAcceleratorCopies(
void
)
{
return GetNumberOfValidAcceleratorCopies(eEndState);
}
///-------------------------------------------------------------------------------------------------
/// <summary> number of valid copies in host space. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
UINT
GetStartNumberOfValidHostCopies(
void
)
{
return GetNumberOfValidHostCopies(eStartState);
}
///-------------------------------------------------------------------------------------------------
/// <summary> number of valid copies in host space. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
UINT
GetFinalNumberOfValidHostCopies(
void
)
{
return GetNumberOfValidHostCopies(eEndState);
}
///-------------------------------------------------------------------------------------------------
/// <summary> was this transfer a Host -> Device transfer? </summary>
///
/// <remarks> Crossbac, 9/20/2012. </remarks>
///
/// <returns> true if h to d xfer, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
IsHToDXfer(
VOID
)
{
if(!bXferOccurred) return FALSE;
UINT nValidHostViewsS = GetStartNumberOfValidHostCopies();
UINT nValidAccViewsS = GetStartNumberOfValidAcceleratorCopies();
UINT nValidAccViewsF = GetFinalNumberOfValidAcceleratorCopies();
switch(GetFinalState()) {
case BSTATE_NO_ENTRY: assert(FALSE); break; // why transfer if there is no buffer?
case BSTATE_INVALID: assert(FALSE); break; // why transfer to create an invalid entry?
case BSTATE_SHARED: return nValidAccViewsF > nValidAccViewsS && (nValidHostViewsS > 0 || uiSrcMemorySpaceId == HOST_MEMORY_SPACE_ID);
case BSTATE_EXCLUSIVE: return nValidAccViewsF > 0 && (nValidHostViewsS > 0 || uiSrcMemorySpaceId == HOST_MEMORY_SPACE_ID);
}
return FALSE;
}
///-------------------------------------------------------------------------------------------------
/// <summary> was this transfer a Device -> Host transfer? </summary>
///
/// <remarks> Crossbac, 9/20/2012. </remarks>
///
/// <returns> true if d to h xfer, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
IsDToHXfer(
VOID
)
{
if(!bXferOccurred) return FALSE;
UINT nValidHostViewsS = GetStartNumberOfValidHostCopies();
UINT nValidHostViewsF = GetFinalNumberOfValidHostCopies();
UINT nValidAccViewsS = GetStartNumberOfValidAcceleratorCopies();
UINT nValidAccViewsF = GetFinalNumberOfValidAcceleratorCopies();
switch(GetFinalState()) {
case BSTATE_NO_ENTRY: assert(FALSE); break; // why transfer if there is no buffer?
case BSTATE_INVALID: assert(FALSE); break; // why transfer to create an invalid entry?
case BSTATE_SHARED: return nValidAccViewsS > 0 && nValidHostViewsS == 0 && nValidHostViewsF > 0;
case BSTATE_EXCLUSIVE: return nValidAccViewsF < nValidAccViewsS && nValidHostViewsF > 0;
}
return FALSE;
}
///-------------------------------------------------------------------------------------------------
/// <summary> was this transfer a Device -> Device transfer? </summary>
///
/// <remarks> Crossbac, 9/20/2012. </remarks>
///
/// <returns> true if d to d xfer, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
IsDToDXfer(
VOID
)
{
if(!bXferOccurred) return FALSE;
UINT nValidHostViewsS = GetStartNumberOfValidHostCopies();
UINT nValidAccViewsS = GetStartNumberOfValidAcceleratorCopies();
UINT nValidAccViewsF = GetFinalNumberOfValidAcceleratorCopies();
if(nValidAccViewsS == 0) return FALSE; // no valid start device view to xfer
if(nValidAccViewsF == 0) return FALSE; // no valid end device view
switch(GetFinalState()) {
case BSTATE_NO_ENTRY: assert(FALSE); break; // why transfer if there is no buffer?
case BSTATE_INVALID: assert(FALSE); break; // why transfer to create an invalid entry?
case BSTATE_SHARED:
// if the final state is shared, and there
// there was a valid device view to begin with
// then the number of device views must be strictly increasing.
// Otherwise, either no new dev view was created (meaning no X->D xfer) or
// some device view had to have been invalidated, which our system would not do.
if(nValidAccViewsF <= nValidAccViewsS) return FALSE; // no additional device views
switch(GetStartState()) {
case BSTATE_NO_ENTRY: return FALSE;
case BSTATE_INVALID: return FALSE;
case BSTATE_SHARED:
// copy could come from host or device.
if(nValidHostViewsS == 0) return TRUE; // *had* to come from device
return uiSrcMemorySpaceId != HOST_MEMORY_SPACE_ID;
case BSTATE_EXCLUSIVE:
// there was only one copy to begin with so
// the source had to be device if there was a valid device view
return nValidAccViewsS > 0;
}
return nValidAccViewsS > 0 && nValidAccViewsF > nValidAccViewsS;
case BSTATE_EXCLUSIVE:
// if the final state is exclusive, then
// the mem space in which we have a valid view must have changed.
// we would only do a D->D transfer if there was not a valid host
// view available, since (with some obvious exceptions), we generally
// must do D->D transfers through the host, so would prefer a host
// view if it was available.
return nValidHostViewsS == 0;
}
return FALSE;
}
} COHERENCETRANSITION;
///-------------------------------------------------------------------------------------------------
/// <summary> Defines a structure for tracking per-datablock instance
/// history of coherence traffic participation. If the PROFILE_MIGRATION
/// compiler directive is selected, each datablock will maintain
/// its own history in this structure, and each history will be merged
/// in the the static view defined below upon deletion. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///-------------------------------------------------------------------------------------------------
typedef struct COHERENCEHISTORY_t {
public:
/// <summary> History of all ports to which a block has been bound. </summary>
std::map<__int64, Port*>* pvPortBindHistory;
/// <summary> History of all IO consumer ports to which a block has been bound. </summary>
std::map<__int64, Port*>* pvIOCPortBindHistory;
/// <summary> The set of all tasks which have touched this block. </summary>
std::map<__int64, Task*>* pvTaskBindHistory;
/// <summary> The accelerator bind history (tracked as accelerator id). </summary>
std::map<__int64, UINT>* pvAcceleratorBindHistory;
/// <summary> The accelerator bind history (tracked as accelerator id). </summary>
std::map<__int64, UINT>* pvDepAcceleratorBindHistory;
/// <summary> The coherence state history. </summary>
std::map<__int64, COHERENCETRANSITION*>* pvStateHistory;
/// <summary> The dbuid of the datablock for which this occurred. </summary>
UINT uiDBUID;
/// <summary> The number of times this block required D->H xfer. </summary>
LONG nDToHCopies;
/// <summary> The number of times this block required H->D xfer. </summary>
LONG nHToDCopies;
/// <summary> The number of times this block required D->D xfer. </summary>
LONG nDToDCopies;
/// <summary> The number of times this block required H->H xfer.
/// This is a sanity check--it better be 0!
/// </summary>
LONG nHToHCopies;
/// <summary> The total number of bytes transferred over the life cycle of
/// this datablock. </summary>
LONG nTotalSyncBytes;
/// <summary> The number of times a block was bound concurrently
/// to multiple ports. This may have some error due to
/// the resolution of the timer, and may need to be revised. </summary>
UINT uiConcurrentPortBindings;
/// <summary> The number of times a block was bound concurrently
/// to multiple ports. This may have some error due to
/// the resolution of the timer, and may need to be revised. </summary>
UINT uiConcurrentTaskBindings;
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///-------------------------------------------------------------------------------------------------
COHERENCEHISTORY_t(
UINT uiDatablockID
)
{
pvPortBindHistory = new std::map<__int64, Port*>();
pvIOCPortBindHistory = new std::map<__int64, Port*>();
pvTaskBindHistory = new std::map<__int64, Task*>();
pvAcceleratorBindHistory = new std::map<__int64, UINT>();
pvDepAcceleratorBindHistory = new std::map<__int64, UINT>();
pvStateHistory = new std::map<__int64, COHERENCETRANSITION*>();
nDToHCopies = 0;
nHToDCopies = 0;
nDToDCopies = 0;
nHToHCopies = 0;
nTotalSyncBytes = 0;
uiConcurrentPortBindings = 0;
uiConcurrentTaskBindings = 0;
uiDBUID = uiDatablockID;
}
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///-------------------------------------------------------------------------------------------------
~COHERENCEHISTORY_t(
VOID
)
{
std::map<__int64, COHERENCETRANSITION*>::iterator mi;
for(mi=pvStateHistory->begin(); mi!=pvStateHistory->end(); mi++)
delete mi->second;
delete pvPortBindHistory;
delete pvIOCPortBindHistory;
delete pvTaskBindHistory;
delete pvStateHistory;
delete pvAcceleratorBindHistory;
delete pvDepAcceleratorBindHistory;
}
} COHERENCEHISTORY;
class CoherenceProfiler {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 7/19/2013. </remarks>
///
/// <param name="pDatablock"> [in,out] If non-null, the datablock. </param>
///-------------------------------------------------------------------------------------------------
CoherenceProfiler(Datablock * pDatablock);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 7/19/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~CoherenceProfiler();
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes the coherence traffic profiler. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///
/// <param name="bEnable"> true to enable, false to disable. </param>
/// <param name="bVerbose"> true to verbose. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL Initialize(BOOL bEnable, BOOL bVerbose=FALSE);
///-------------------------------------------------------------------------------------------------
/// <summary> Deinitializes the coherence traffic profiler. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL Deinitialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps the coherence traffic statistics. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void Report(std::ostream& ios);
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps the coherence traffic statistics. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static std::stringstream * GetReport();
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps the coherence traffic statistics. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void GetDetailedReport(std::ostream& ios);
///-------------------------------------------------------------------------------------------------
/// <summary> Coherence tracker record view update start. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pDatablock"> If non-null, the datablock. </param>
/// <param name="nDestMemorySpaceID"> Identifier for the memory space. </param>
/// <param name="eEventType"> Type of the event. </param>
///
/// <returns> new transition object. </returns>
///-------------------------------------------------------------------------------------------------
COHERENCETRANSITION *
RecordViewUpdateStart(
__in UINT nDestMemorySpaceID,
__in COHERENCEEVENTTYPE eEventType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Coherence tracker record view update end. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///
/// <param name="pDatablock"> If non-null, the datablock. </param>
/// <param name="nSrcMemorySpaceID"> Identifier for the source memory space. </param>
/// <param name="uiRequestedState"> The requested coherence state. This affects whether other
/// accelerator views require invalidation. </param>
/// <param name="bTransferOccurred"> The transfer occurred. </param>
/// <param name="pTx"> non-null, the state transition descriptor. </param>
///-------------------------------------------------------------------------------------------------
void
RecordViewUpdateEnd(
__in UINT nSrcMemorySpaceID,
__in BUFFER_COHERENCE_STATE uiRequestedState,
__in BOOL bTransferOccurred,
__in COHERENCETRANSITION * pTx
);
///-------------------------------------------------------------------------------------------------
/// <summary> Record port binding. </summary>
///
/// <remarks> Crossbac, 9/19/2012. </remarks>
///
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
///-------------------------------------------------------------------------------------------------
void RecordPortBinding(Port * pPort);
///-------------------------------------------------------------------------------------------------
/// <summary> Record task binding. </summary>
///
/// <remarks> Crossbac, 9/19/2012. </remarks>
///
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
///-------------------------------------------------------------------------------------------------
void RecordTaskBinding(Task * pTask);
///-------------------------------------------------------------------------------------------------
/// <summary> Record binding. </summary>
///
/// <remarks> Crossbac, 9/20/2012. </remarks>
///
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
/// <param name="pTask"> [in,out] If non-null, the task. </param>
///-------------------------------------------------------------------------------------------------
void RecordBinding(Port * pPort, Task * pTask, Port * pIOConsumer);
///-------------------------------------------------------------------------------------------------
/// <summary> Coherence tracker set detailed. </summary>
///
/// <remarks> Crossbac, 9/21/2012. </remarks>
///
/// <param name="bDetailed"> true to collect detailed stats. </param>
///-------------------------------------------------------------------------------------------------
void SetDetailed(BOOL bDetailed);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes the coherence history for this block. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void InitializeInstanceHistory();
///-------------------------------------------------------------------------------------------------
/// <summary> Deinitializes the coherence history for this block. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void DeinitializeInstanceHistory();
///-------------------------------------------------------------------------------------------------
/// <summary> Merge the coherence history for this block with the static view. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void MergeHistory();
///-------------------------------------------------------------------------------------------------
/// <summary> Merge the coherence histories for all blocks into the static view. </summary>
///
/// <remarks> Crossbac, 9/18/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void MergeHistories();
/// <summary> The datablock this profiler object is tracking. </summary>
Datablock * m_pDatablock;
/// <summary> Coherence history and statistics for this block, including:
/// 1. all ports to which this block has been bound.
/// 2. The set of all tasks which have touched this block
/// 3. The number of times this block required D->H xfer.
/// 4. The number of times this block required H->D xfer.
/// 5. The number of times this block required D->D xfer.
/// 6. The number of times this block required H->H xfer.
/// 7. The total number of bytes transferred over the life cycle of
/// this datablock.
/// 8. The history of state transitions. </summary>
COHERENCEHISTORY* m_pCoherenceHistory;
/// <summary> True if we are in the middle of recording a state transition
/// in the coherence profiler. Helps us catch situations where we
/// accidentally attempt nested recording of transitions, which
/// would deeply screw up the results.
/// </summary>
BOOL m_bCoherenceProfilerTransitionActive;
/// <summary> The dev to dev migrations with invalidation. </summary>
static LONG m_nDToDMigrationsExclusive;
/// <summary> The dev to dev migrations with shared state. </summary>
static LONG m_nDToDMigrationsShared;
/// <summary> The host to dev migrations with invalidation. </summary>
static LONG m_nHToDMigrationsExclusive;
/// <summary> The host to dev migrations without invalidation. </summary>
static LONG m_nHToDMigrationsShared;
/// <summary> The dev to host migrations with invalidation. </summary>
static LONG m_nDToHMigrationsExclusive;
/// <summary> The dev to host migrations without invalidation. </summary>
static LONG m_nDToHMigrationsShared;
/// <summary> The number of times a coherence event caused multiple
/// valid views to be abandoned. </summary>
static LONG m_nMultiViewInvalidations;
/// <summary> The number of state transitions whose cause was unspecified. </summary>
static LONG m_nCETUnspecified;
/// <summary> The number of state transitions triggered by a binding to task input</summary>
static LONG m_nCETBindInput;
/// <summary> The number of state transitions triggered by a binding to taks output</summary>
static LONG m_nCETBindOutput;
/// <summary> The number of state transitions triggered by a binding to a task constant port</summary>
static LONG m_nCETBindConstant;
/// <summary> The number of state transitions triggered by pushing into multiple consumer channels </summary>
static LONG m_nCETDownstreamShare;
/// <summary> The number of state transitions triggered by a user request for a pointer in host space</summary>
static LONG m_nCETPointerRequest;
/// <summary> The number of state transitions triggered by the deletion of the block</summary>
static LONG m_nCETBlockDelete;
/// <summary> The number of state transitions triggered by the cloning of the block </summary>
static LONG m_nCETBlockClone;
/// <summary> The number of state transitions triggered by block allocation </summary>
static LONG m_nCETBlockCreate;
/// <summary> The number of state transitions triggered when we are updating the host view of
/// the block, but don't actually have access to the information we need to figure
/// out what action triggered the view update. Most likely a user request.
/// </summary>
static LONG m_nCETHostViewUpdate;
/// <summary> The number of state transitions triggered when we are updating the device view of
/// the block, but don't actually have access to the information we need to figure
/// out what action triggered the view update. Most likely a user request.
/// </summary>
static LONG m_nCETAcceleratorViewUpdate;
/// <summary> The number of state transitions triggered when Buffers are being allocated for a
/// block.
/// </summary>
static LONG m_nCETBufferAllocate;
/// <summary> The number of state transitions triggered when a request to grow the buffer
/// caused some buffer reallocation and potentially view updates as a side effect.
/// </summary>
static LONG m_nCETGrowBuffer;
/// <summary> The number of state transitions triggered when a request to synthesize
/// a metadata block caused the traffic </summary>
static LONG m_nCETSynthesizeBlock;
/// <summary> The number of state transitions triggered when
/// needed a pinned host buffer in addition to a dev buffer </summary>
static LONG m_nCETPinnedHostView;
/// <summary> Is the profiler initialised? </summary>
static LONG m_nCoherenceProfilerInit;
/// <summary> Is the profiler enabled? </summary>
static LONG m_nCoherenceProfilerEnabled;
/// <summary> true if the coherence tracker should emit copious text. </summary>
static BOOL m_bCoherenceProfilerVerbose;
/// <summary> The detailed statistics. </summary>
static BOOL m_bCoherenceStatisticsDetailed;
/// <summary> The per task histories. </summary>
static std::map<UINT, COHERENCEHISTORY*> m_vHistories;
/// <summary> The timer. </summary>
static CHighResolutionTimer * m_pTimer;
/// <summary> List of names task names. Required because we will no longer have
/// valid task pointers when we check for leaks (all tasks *should* be
/// deleted by that point), and we want to be able to find the task
/// that allocated a block if it was leaked and provide it's name as
/// a debug assist.
/// </summary>
static std::map<PTask::Task*, std::string> m_vTaskNames;
/// <summary> List of port names. Required because we will no longer have
/// valid port pointers when we check for leaks (all ports *should* be
/// deleted by that point), and we want to be able to find the last
/// port that touched any leaked blocks.
/// </summary>
static std::map<PTask::Port*, std::string> m_vPortNames;
/// <summary> The coherence profiler lock. Protects the static data structures
/// collecting data xfer statistics.
/// </summary>
static CRITICAL_SECTION m_csCoherenceProfiler;
friend class Datablock;
};
};
#endif

Просмотреть файл

@ -1,256 +0,0 @@
//--------------------------------------------------------------------------------------
// File: CompiledKernel.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _COMPILED_KERNEL_H_
#define _COMPILED_KERNEL_H_
#include "accelerator.h"
#include "ptlock.h"
#include <map>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> function signature for host tasks that have dependences on other accelerators.
/// The BOOL array contains entries which are true if that entry corresponds to an
/// input already materialized on the dependent device, false otherwise. The
/// pvDeviceBindings array contains entries which are meaningful when the entry at
/// the same index in the BOOL array is true, and is a platform-specific device id.
/// Generated code must know how to use these IDs.
/// </summary>
///
/// <remarks> Crossbac, 5/16/2012. </remarks>
///-------------------------------------------------------------------------------------------------
typedef void (__stdcall *LPFNTASKINITIALIZER)(DWORD dwThreadId, int nDeviceId);
class CompiledKernel
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="lpszSourceFile"> [in] non-null, source file. </param>
/// <param name="lpszOperation"> [in] non-null, the operation. </param>
/// <param name="lpszInitializerBinary"> [in,out] If non-null, the initializer binary. </param>
/// <param name="lpszInitializerEntryPoint"> [in,out] If non-null, the initializer entry
/// point. </param>
/// <param name="eInitializerPSObjectClass"> (Optional) the initializer ps object class. </param>
///-------------------------------------------------------------------------------------------------
CompiledKernel(
__in char * lpszSourceFile,
__in char * lpszOperation,
__in char * lpszInitializerBinary,
__in char * lpszInitializerEntryPoint,
__in ACCELERATOR_CLASS eInitializerPSObjectClass=ACCELERATOR_CLASS_UNKNOWN
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~CompiledKernel(void);
///-------------------------------------------------------------------------------------------------
/// <summary>
/// Gets the platform specific binary associated with the given accelerator. Generally
/// speaking, we will compile a kernel separately for every accelerator in the system capable
/// of running it, since the accelerators may have different capabilities. This method
/// retrieves the result of that compilation, which is an object whose type depends on the
/// platform supported by the accelerator. For example, in directX, this retrieves a compute
/// shader interface.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
///
/// <returns> null if it fails, else the platform specific binary. </returns>
///-------------------------------------------------------------------------------------------------
virtual void * GetPlatformSpecificBinary(Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a platform specific binary. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
/// <param name="pPlatformSpecificBinary"> [in] non-null, the platform specific binary. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetPlatformSpecificBinary(Accelerator * pAccelerator, void * pPlatformSpecificBinary);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a platform specific module. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
///
/// <returns> null if it fails, else the platform specific module. </returns>
///-------------------------------------------------------------------------------------------------
virtual void * GetPlatformSpecificModule(Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a platform specific module. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
/// <param name="pPlatformSpecificModule"> [in,out] If non-null, the platform specific module. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetPlatformSpecificModule(Accelerator * pAccelerator, void * pPlatformSpecificModule);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the source file. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the source file. </returns>
///-------------------------------------------------------------------------------------------------
virtual const char * GetSourceFile();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the operation. The operation is the top-level entry
/// point into kernel code, and must be specified, since a single
/// source file may contain many such entry points.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the operation. </returns>
///-------------------------------------------------------------------------------------------------
virtual const char * GetOperation();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the source binary for init routine. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the source file. </returns>
///-------------------------------------------------------------------------------------------------
virtual const char * GetInitializerBinary();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the entry point for any initializer routines.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the operation. </returns>
///-------------------------------------------------------------------------------------------------
virtual const char * GetInitializerEntryPoint();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets the initializer binary. </summary>
///
/// <remarks> crossbac, 8/13/2013. </remarks>
///
/// <param name="hModule"> The module. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetInitializerBinary(HMODULE hModule);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets the initializer entry point. </summary>
///
/// <remarks> crossbac, 8/13/2013. </remarks>
///
/// <param name="lpvProcAddress"> [in,out] If non-null, the lpv proc address. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetInitializerEntryPoint(void * lpvProcAddress);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this kernel has a static initializer that should be called as part
/// of putting the graph in the run state. </summary>
///
/// <remarks> crossbac, 8/13/2013. </remarks>
///
/// <returns> true if static initializer, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasStaticInitializer();
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if any present initializer routines requires platform-specific
/// device objects to provided when called. </summary>
///
/// <remarks> crossbac, 8/13/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL InitializerRequiresPSObjects();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets initializer required ps classes. </summary>
///
/// <remarks> crossbac, 8/13/2013. </remarks>
///
/// <returns> null if it fails, else the initializer required ps classes. </returns>
///-------------------------------------------------------------------------------------------------
virtual ACCELERATOR_CLASS GetInitializerRequiredPSClass();
///-------------------------------------------------------------------------------------------------
/// <summary> Executes the initializer, with a list of platform specific resources.
/// </summary>
///
/// <remarks> crossbac, 8/13/2013. </remarks>
///
/// <param name="vPSDeviceObjects"> [in,out] [in,out] If non-null, the ps device objects. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
InvokeInitializer(
__in DWORD dwThreadId,
__in std::set<Accelerator*>& vPSDeviceObjects
);
///-------------------------------------------------------------------------------------------------
/// <summary> Executes the initializer, if present.
/// </summary>
///
/// <remarks> crossbac, 8/13/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL InvokeInitializer(DWORD dwThreadId);
protected:
char * m_lpszSourceFile;
char * m_lpszOperation;
char * m_lpszInitializerBinary;
char * m_lpszInitializerEntryPoint;
ACCELERATOR_CLASS m_eInitializerPSObjectClass;
std::map<Accelerator *, void *> m_vPlatformSpecificKernels;
std::map<Accelerator *, void *> m_vPlatformSpecificModules;
HANDLE m_lpvInitializerModule;
void * m_lpvInitializerProcAddress;
BOOL m_bInitializerInvoked;
static std::map<std::string, HMODULE> m_vLoadedDlls;
static std::map<std::string, std::map<std::string, FARPROC>> m_vEntryPoints;
static PTLock m_vModuleLock;
};
};
#endif

Просмотреть файл

@ -1,195 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: DXAsyncContext.h
//
// summary: Declares the DirectX asynchronous context class
///-------------------------------------------------------------------------------------------------
#ifndef __DX_ASYNC_CONTEXT_H__
#define __DX_ASYNC_CONTEXT_H__
#include "primitive_types.h"
#include "accelerator.h"
#include "dxaccelerator.h"
#include "task.h"
#include "channel.h"
#include <map>
#include <vector>
#include <list>
#include "hrperft.h"
#include "AsyncContext.h"
#include "AsyncDependence.h"
namespace PTask {
class DXAsyncContext : public AsyncContext {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDeviceContext"> [in,out] If non-null, context for the device. </param>
/// <param name="pTaskContext"> [in,out] If non-null, context for the task. </param>
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
///-------------------------------------------------------------------------------------------------
DXAsyncContext(
__in Accelerator * pDeviceContext,
__in Task * pTaskContext,
__in ASYNCCONTEXTTYPE eAsyncContextType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~DXAsyncContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes this object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Initialize();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific create synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual SyncPoint *
PlatformSpecificCreateSyncPoint(
void * pPSSyncObject
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific destroy synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificDestroySynchronizationPoint(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if we can platform specific synchronize context. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificSynchronizeContext(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence asynchronously. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificInsertFence(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence synchronously. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificSynchronousWait(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if the sync point is resolved (and marks it if so). </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificQueryOutstanding(
__inout SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific nonblocking check whether the event remains outstanding. </summary>
///
/// <remarks> crossbac, 7/2/2013. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificNonblockingQueryOutstanding(
__inout SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence synchronously without locking the async context
/// or underlying accelerator: this simplifies lock acquisition for such
/// waits, but at the expense of leaving live dependences that are
/// actually resolved. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificLocklessSynchronousWait(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform context object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the platform context object. </returns>
///-------------------------------------------------------------------------------------------------
virtual void *
GetPlatformContextObject();
ID3D11DeviceContext * m_pDXContext;
};
};
#endif

Просмотреть файл

@ -1,202 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: DatablockProfiler.h
//
// summary: Declares the datablock profiler class
///-------------------------------------------------------------------------------------------------
#ifndef __DATABLOCK_PROFILER_H__
#define __DATABLOCK_PROFILER_H__
#include "primitive_types.h"
#include "ReferenceCounted.h"
#include <map>
#include <set>
class CHighResolutionTimer;
namespace PTask {
class Port;
class Task;
class BlockPool;
class BlockPoolOwner;
class Datablock;
class DatablockProfiler {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 7/19/2013. </remarks>
///
/// <param name="pDatablock"> [in,out] If non-null, the datablock. </param>
///-------------------------------------------------------------------------------------------------
DatablockProfiler(Datablock * pDatablock);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 7/19/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~DatablockProfiler();
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes the datablock profiler. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL Initialize(BOOL bEnable, BOOL bVerbose=FALSE);
///-------------------------------------------------------------------------------------------------
/// <summary> Deinitializes the datablock profiler. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static BOOL Deinitialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps the databasedatablock profiler leaks. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void Report(std::ostream& ios);
///-------------------------------------------------------------------------------------------------
/// <summary> Profile allocation. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
static void RecordAllocation(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Profile deletion. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
static void RecordDeletion(Datablock*pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Record port binding. </summary>
///
/// <remarks> Crossbac, 9/19/2012. </remarks>
///
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
///-------------------------------------------------------------------------------------------------
void RecordBinding(Port * pPort);
///-------------------------------------------------------------------------------------------------
/// <summary> Record task binding. </summary>
///
/// <remarks> Crossbac, 9/19/2012. </remarks>
///
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
///-------------------------------------------------------------------------------------------------
void RecordBinding(Task * pTask);
///-------------------------------------------------------------------------------------------------
/// <summary> Record binding. </summary>
///
/// <remarks> Crossbac, 9/20/2012. </remarks>
///
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
/// <param name="pTask"> [in,out] If non-null, the task. </param>
///-------------------------------------------------------------------------------------------------
void RecordBinding(Port * pPort, Task * pTask, Port * pIOConsumer);
///-------------------------------------------------------------------------------------------------
/// <summary> Record pool binding. </summary>
///
/// <remarks> Crossbac, 7/19/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void RecordPoolBinding();
/// <summary> The set of all ports to which this block has been bound. </summary>
std::set<Port*> m_vPortBindings;
/// <summary> The set of all tasks which have touched this block. </summary>
std::set<Task*> m_vTaskBindings;
/// <summary> The set of pool owners with block pools used to allocate blocks.
/// Necessary because block pooling can cause blocks to be reused
/// between allocation and deletion. Maintained as a map to string
/// since the owner may be deleted by the time we attempt deletion.
/// </summary>
std::map<BlockPoolOwner*, std::string> m_vPools;
/// <summary> List of names task names. Required because we will no longer have
/// valid task pointers when we check for leaks (all tasks *should* be
/// deleted by that point), and we want to be able to find the task
/// that allocated a block if it was leaked and provide it's name as
/// a debug assist.
/// </summary>
static std::map<PTask::Task*, std::string> m_vTaskNames;
/// <summary> List of port names. Required because we will no longer have
/// valid port pointers when we check for leaks (all ports *should* be
/// deleted by that point), and we want to be able to find the last
/// port that touched any leaked blocks.
/// </summary>
static std::map<PTask::Port*, std::string> m_vPortNames;
protected:
Datablock * m_pDatablock;
/// <summary> The number of datablock allocations. </summary>
static LONG m_nDBAllocations;
/// <summary> The datablock deletion count. </summary>
static LONG m_nDBDeletions;
/// <summary> The number of clone allocations. </summary>
static LONG m_nDBCloneAllocations;
/// <summary> The number of clone deletions. </summary>
static LONG m_nDBCloneDeletions;
/// <summary> Is the profiler initialised? </summary>
static LONG m_nDBProfilerInit;
/// <summary> Is the profiler initialised? </summary>
static LONG m_nDBProfilerEnabled;
/// <summary> true if the allocation tracker should emit copious text. </summary>
static BOOL m_bDBProfilerVerbose;
/// <summary> The set of datablocks currently allocated but not yet deleted. </summary>
static std::set<PTask::Datablock*> m_vAllAllocations;
// these structures are also needed by the coherence profiler.
// if both compile-time options are selected, then these are already
// defined by the time we get here
/// <summary> The profiler lock. Protects the allocation counts,
/// the allocation set, and the port and task maps.
/// </summary>
static CRITICAL_SECTION m_csDBProfiler;
friend class Datablock;
};
};
#endif

Просмотреть файл

@ -1,291 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: DeviceMemoryStatus.h
//
// summary: Declares the device memory status class
///-------------------------------------------------------------------------------------------------
#ifndef __DEVICE_MEMORY_STATUS_H__
#define __DEVICE_MEMORY_STATUS_H__
#include "primitive_types.h"
#include "Lockable.h"
#include <iostream>
#include <sstream>
#include <stdlib.h>
#include <map>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> Memory status for a memory type on a device.
/// Currently we track global and page-locked memory.
/// Could easily expand to track other types. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
typedef struct DeviceMemoryStatus_t {
/// <summary> The name. </summary>
std::string m_name;
/// <summary> A record of all allocations: maps the pointer to the size </summary>
std::map<void *, unsigned __int64> m_vAllocations;
/// <summary> The size in bytes of the memory space. </summary>
unsigned __int64 m_uiMemorySpaceSize;
/// <summary> The size in bytes of the smallest allocated extent. </summary>
unsigned __int64 m_uiMinAllocExtentSize;
/// <summary> The size in bytes of the largest allocated extent. </summary>
unsigned __int64 m_uiMaxAllocExtentSize;
/// <summary> (historical) the low water mark for total allocated bytes. </summary>
unsigned __int64 m_uiLowWaterMarkBytes;
/// <summary> (historical) the high water mark for total allocated bytes. </summary>
unsigned __int64 m_uiHighWaterMarkBytes;
/// <summary> (current state) the total bytes currently allocated. </summary>
unsigned __int64 m_uiCurrentlyAllocatedBytes;
/// <summary> (current state) the total number of currently allocated buffers. </summary>
unsigned __int64 m_uiCurrentlyAllocatedBuffers;
/// <summary> The total number of allocation requests. </summary>
unsigned __int64 m_uiAllocationRequests;
/// <summary> The total deallocation requests. </summary>
unsigned __int64 m_uiDeallocationRequests;
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
DeviceMemoryStatus_t(
std::string &szName,
char * lpszUniquifier
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
~DeviceMemoryStatus_t();
///-------------------------------------------------------------------------------------------------
/// <summary> Resets this object. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void Reset(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Record a memory allocation. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
/// <param name="uiBytes"> The bytes. </param>
///-------------------------------------------------------------------------------------------------
void
RecordAllocation(
__in void * pMemoryExtent,
__in size_t uiBytes
);
///-------------------------------------------------------------------------------------------------
/// <summary> Record a memory deallocation. We provide "require entry" flag to
/// simplify tracking of page-locked allocations which are a strict subset
/// of all allocations. If we are removing an entry from the global tracking,
/// we require that an entry for it be found, otherwise we complain. If
/// we are removing entries from the page-locked tracking, it is not an
/// error if there is no entry present.
/// </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
/// <param name="bRequireEntry"> true to pinned allocation. </param>
///-------------------------------------------------------------------------------------------------
void
RecordDeallocation(
__in void * pMemoryExtent,
__in BOOL bRequireEntry
);
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps the allocation statistics. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void Report(
std::ostream &ios
);
///-------------------------------------------------------------------------------------------------
/// <summary> Updates the memory space size described by uiBytes. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <param name="uiBytes"> The bytes. </param>
///-------------------------------------------------------------------------------------------------
void
UpdateMemorySpaceSize(
unsigned __int64 uiBytes
);
} MEMSTATEDESC;
typedef struct GlobalDeviceMemoryState_t {
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
GlobalDeviceMemoryState_t(
std::string& szDeviceName
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
~GlobalDeviceMemoryState_t(
VOID
);
/// <summary> synchronization. </summary>
void Lock();
void Unlock();
///-------------------------------------------------------------------------------------------------
/// <summary> Resets the stats. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void Reset(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Record a memory allocation. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
/// <param name="uiBytes"> The bytes. </param>
///-------------------------------------------------------------------------------------------------
void
RecordAllocation(
__in void * pMemoryExtent,
__in size_t uiBytes,
__in BOOL bPinned
);
///-------------------------------------------------------------------------------------------------
/// <summary> Record a memory deallocation. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
/// <param name="bPinnedAllocation"> true to pinned allocation. </param>
/// <param name="uiBytes"> The bytes. </param>
///-------------------------------------------------------------------------------------------------
void
RecordDeallocation(
__in void * pMemoryExtent
);
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps the allocation statistics. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void Report(
std::ostream &ios
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets global memory state. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <returns> null if it fails, else the global memory state. </returns>
///-------------------------------------------------------------------------------------------------
MEMSTATEDESC *
GetGlobalMemoryState(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets global memory state. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <returns> null if it fails, else the global memory state. </returns>
///-------------------------------------------------------------------------------------------------
MEMSTATEDESC *
GetPageLockedMemoryState(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Updates the memory space size described by uiBytes. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <param name="uiBytes"> The bytes. </param>
///-------------------------------------------------------------------------------------------------
void
UpdateMemorySpaceSize(
unsigned __int64 uiBytes
);
///-------------------------------------------------------------------------------------------------
/// <summary> Return the percentage of this memory space that is allocated. </summary>
///
/// <remarks> crossbac, 9/10/2013. </remarks>
///
/// <returns> The allocated percent. </returns>
///-------------------------------------------------------------------------------------------------
UINT
GetAllocatedPercent(
void
);
protected:
MEMSTATEDESC m_global;
MEMSTATEDESC m_pagelocked;
CRITICAL_SECTION m_lock;
} DEVICEMEMORYSTATE;
};
#endif // __DEVICE_MEMORY_STATUS_H__

Просмотреть файл

@ -1,299 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: GeometryEstimator.h
//
// summary: Declares the geometry estimator class
///-------------------------------------------------------------------------------------------------
#ifndef __GEOMETRY_ESTIMATOR_H__
#define __GEOMETRY_ESTIMATOR_H__
#include "PTaskRuntime.h"
#include <map>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> Argument descriptor: provided the peeked value of a datablock and the source port
/// from which it was peeked.
/// </summary>
///
/// <remarks> crossbac, 5/1/2012. </remarks>
///-------------------------------------------------------------------------------------------------
typedef struct _ptask_arg_t {
Datablock * pBlock;
Port * pSourcePort;
Port * pAllocator;
DatablockTemplate * pPortTemplate;
} PTASKARGDESC, *PPTASKARGDESC;
static const int PTGE_DEFAULT_BASIC_GROUP = 256;
static const int PTGE_DEFAULT_BASIC_GROUP_X = 32;
static const int PTGE_DEFAULT_BASIC_GROUP_Y = 32;
static const int PTGE_DEFAULT_BASIC_GROUP_Z = 1;
static const int PTGE_DEFAULT_ELEMENTS_PER_THREAD = 1;
///-------------------------------------------------------------------------------------------------
/// <summary> Geometry estimator callback function prototype. Allows the user is provide a
/// custom estimator function.
/// </summary>
///-------------------------------------------------------------------------------------------------
typedef void
(__stdcall *LPFNGEOMETRYESTIMATOR)(
UINT nArguments,
PTASKARGDESC ** ppArguments,
PTASKDIM3 * pBlockDims,
PTASKDIM3 * pGridDims,
int nElementsPerThread,
int nBasicGroupSizeX,
int nBasicGroupSizeY,
int nBasicGroupSizeZ
);
///-------------------------------------------------------------------------------------------------
/// <summary> Values that represent canonical estimator functions. Most estimators are so
/// common that it makes no sense to force the user to code them explicitly. These
/// values provide a library of common estimators.
/// </summary>
///
/// <remarks> crossbac, 5/1/2012. </remarks>
///-------------------------------------------------------------------------------------------------
typedef enum _estimator_fns {
/// <summary> No size estimator function has been provided.
/// </summary>
NO_SIZE_ESTIMATOR = 0,
/// <summary> Estimate the geometry based on the size of the
/// datablock bound to the first port.
/// </summary>
BASIC_INPUT_SIZE_ESTIMATOR = 1, //
/// <summary> Estimate the geometry based on the max of the
/// record counts over all input datablocks.
/// </summary>
MAX_INPUT_SIZE_ESTIMATOR = 2,
/// <summary> Estimate the geometry based on the max of the
/// record counts over all output datablocks.
/// </summary>
MAX_OUTPUT_SIZE_ESTIMATOR = 3,
/// <summary> Ports are bound to a particular dimension
/// of the iteration space. This estimator
/// looks for explicit port bindings and assembles
/// the iteration space accordingly. </summary>
EXPLICIT_DIMENSION_ESTIMATOR = 4,
/// <summary> The user commits to provide a callback to
/// estimate the dispatch dimensions.
/// </summary>
USER_DEFINED_ESTIMATOR = 5
// ....
} GEOMETRYESTIMATORTYPE;
///-------------------------------------------------------------------------------------------------
/// <summary> Geometry estimator. Functions for estimating dispatch dimensions based on
/// dynamically available information.
/// </summary>
///
/// <remarks> crossbac, 5/1/2012. </remarks>
///-------------------------------------------------------------------------------------------------
class GeometryEstimator {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Basic Input size geometry estimator. Accepts as input all the datablocks that
/// will be bound to inputs for a given task, but examines only the block bound to
/// parameter 0. This is a legacy function: achtung!
/// </summary>
///
/// <remarks> crossbac, 12/20/2011. </remarks>
///
/// <param name="nArguments"> The number of arguments. </param>
/// <param name="ppArguments"> [in] non-null, a vector of input data blocks. </param>
/// <param name="pBlockDims"> [out] non-null, the thread block dimensions. </param>
/// <param name="pGridDims"> [out] non-null, the grid dimensions . </param>
/// <param name="nElementsPerThread"> (optional) The elements assumed by kernel code to be
/// assigned to each thread. Default is 1. </param>
/// <param name="nBasicGroupSizeX"> (optional) size of the basic group. Default is 512. </param>
/// <param name="nBasicGroupSizeY"> The basic group size y coordinate. </param>
/// <param name="nBasicGroupSizeZ"> The basic group size z coordinate. </param>
///-------------------------------------------------------------------------------------------------
static void
BasicInputSizeGeometryEstimator(
__in UINT nArguments,
__in PTask::PTASKARGDESC ** ppArguments,
__out PTask::PTASKDIM3 * pBlockDims,
__out PTask::PTASKDIM3 * pGridDims,
__in int nElementsPerThread,
__in int nBasicGroupSizeX,
__in int nBasicGroupSizeY,
__in int nBasicGroupSizeZ
);
///-------------------------------------------------------------------------------------------------
/// <summary> Max Input size geometry estimator. Accepts as input all the datablocks that will
/// be bound to inputs for a given task, and takes the max over all the record counts
/// to find the conservative maximum number of thread blocks that will be required to
/// ensure each input element is processed.
/// </summary>
///
/// <remarks> crossbac, 12/20/2011. </remarks>
///
/// <param name="nArguments"> The number of arguments. </param>
/// <param name="ppArguments"> [in] non-null, a vector of input data blocks. </param>
/// <param name="pBlockDims"> [out] non-null, the thread block dimensions. </param>
/// <param name="pGridDims"> [out] non-null, the grid dimensions . </param>
/// <param name="nElementsPerThread"> (optional) The elements assumed by kernel code to be
/// assigned to each thread. Default is 1. </param>
/// <param name="nBasicGroupSizeX"> (optional) size of the basic group. Default is 512. </param>
/// <param name="nBasicGroupSizeY"> The basic group size y coordinate. </param>
/// <param name="nBasicGroupSizeZ"> The basic group size z coordinate. </param>
///-------------------------------------------------------------------------------------------------
static void
MaxInputSizeGeometryEstimator(
__in UINT nArguments,
__in PTask::PTASKARGDESC ** ppArguments,
__out PTask::PTASKDIM3 * pBlockDims,
__out PTask::PTASKDIM3 * pGridDims,
__in int nElementsPerThread,
__in int nBasicGroupSizeX,
__in int nBasicGroupSizeY,
__in int nBasicGroupSizeZ
);
///-------------------------------------------------------------------------------------------------
/// <summary> Max output size geometry estimator. Accepts as input all the datablocks that will
/// be bound to outputs for a given task, and takes the max over all the record
/// counts to find the conservative maximum number of thread blocks that will be
/// required to ensure each input element is processed. Note that this is a somewhat
/// more subtle task than examining input blocks because output blocks with MetaPorts
/// serving as input allocator will not be allocated yet.
/// </summary>
///
/// <remarks> crossbac, 12/20/2011. </remarks>
///
/// <param name="nArguments"> The number of arguments. </param>
/// <param name="ppArguments"> [in] non-null, a vector of input data blocks. </param>
/// <param name="pBlockDims"> [out] non-null, the thread block dimensions. </param>
/// <param name="pGridDims"> [out] non-null, the grid dimensions . </param>
/// <param name="nElementsPerThread"> (optional) The elements assumed by kernel code to be
/// assigned to each thread. Default is 1. </param>
/// <param name="nBasicGroupSizeX"> (optional) size of the basic group. Default is 512. </param>
/// <param name="nBasicGroupSizeY"> The basic group size y coordinate. </param>
/// <param name="nBasicGroupSizeZ"> The basic group size z coordinate. </param>
///-------------------------------------------------------------------------------------------------
static void
MaxOutputSizeGeometryEstimator(
__in UINT nArguments,
__in PTask::PTASKARGDESC ** ppArguments,
__out PTask::PTASKDIM3 * pBlockDims,
__out PTask::PTASKDIM3 * pGridDims,
__in int nElementsPerThread,
__in int nBasicGroupSizeX,
__in int nBasicGroupSizeY,
__in int nBasicGroupSizeZ
);
///-------------------------------------------------------------------------------------------------
/// <summary> Ports are bound to dimensions of the iteration space such that the datablock size
/// maps directly to one dimension of space. Accept all port/block pairs and use
/// those with an explicit binding to assemble the iteration space.
/// </summary>
///
/// <remarks> crossbac, 12/20/2011. </remarks>
///
/// <param name="nArguments"> The number of arguments. </param>
/// <param name="ppArguments"> [in] non-null, a vector of input data blocks. </param>
/// <param name="pBlockDims"> [out] non-null, the thread block dimensions. </param>
/// <param name="pGridDims"> [out] non-null, the grid dimensions . </param>
/// <param name="nElementsPerThread"> (optional) The elements assumed by kernel code to be
/// assigned to each thread. Default is 1. </param>
/// <param name="nBasicGroupSizeX"> (optional) size of the basic group. Default is 32. </param>
/// <param name="nBasicGroupSizeY"> (optional) the basic group size y coordinate. </param>
/// <param name="nBasicGroupSizeZ"> (optional) the basic group size z coordinate. </param>
///-------------------------------------------------------------------------------------------------
static void
ExplicitDimensionEstimator(
UINT nArguments,
PTask::PTASKARGDESC ** ppArguments,
PTask::PTASKDIM3 * pBlockDims,
PTask::PTASKDIM3 * pGridDims,
int nElementsPerThread,
int nBasicGroupSizeX,
int nBasicGroupSizeY,
int nBasicGroupSizeZ
);
///-------------------------------------------------------------------------------------------------
/// <summary> Adds peeked blocks from all the ports in the given map to the argument list.
/// Helps assemble the argument list input for an estimator.
/// </summary>
///
/// <remarks> crossbac, 5/1/2012. </remarks>
///
/// <param name="pPortMap"> [in,out] If non-null, the port map. </param>
/// <param name="ppArgs"> [in,out] If non-null, the arguments. </param>
/// <param name="nPortIndex"> [in,out] Zero-based index of the n port. </param>
/// <param name="nMaxToAdd"> (optional) the maximum number of ports to add. -1 means
/// unbounded. </param>
///-------------------------------------------------------------------------------------------------
static void
AddToArgumentList(
std::map<UINT, Port*>* pPortMap,
PTASKARGDESC ** ppArgs,
int &nPortIndex,
int nMaxToAdd=-1
);
///-------------------------------------------------------------------------------------------------
/// <summary> Adds peeked blocks from all the ports for all relevant port maps to the argument
/// list. Helps assemble the argument list input for an estimator.
/// </summary>
///
/// <remarks> crossbac, 5/1/2012. </remarks>
///
/// <param name="pTask"> [in,out] If non-null, the port map. </param>
/// <param name="pppArgs"> [in,out] If non-null, the ppp arguments. </param>
///
/// <returns> the number of arguments in the given list. </returns>
///-------------------------------------------------------------------------------------------------
static int
CreateEstimatorArgumentList(
Task * pTask,
PTASKARGDESC *** pppArgs
);
///-------------------------------------------------------------------------------------------------
/// <summary> Estimate task geometry for a cuda task. This implementation is
/// platform specific because the interface for specifying launch dimensions
/// is specific to cuda.
/// </summary>
///
/// <remarks> crossbac, 5/1/2012. </remarks>
///
/// <param name="pTask"> [in,out] If non-null, the task. </param>
///-------------------------------------------------------------------------------------------------
static void
EstimateCUTaskGeometry(
Task * pTask
);
};
}
#endif // __GEOMETRY_ESTIMATOR_H__

Просмотреть файл

@ -1,423 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: GlobalBlockPool.h
//
// summary: Declares the global block pool class
///-------------------------------------------------------------------------------------------------
#ifndef __GLOBAL_BLOCK_POOL_H__
#define __GLOBAL_BLOCK_POOL_H__
#include <stdio.h>
#include <crtdbg.h>
#include <Windows.h>
#include "datablock.h"
#include "BlockPoolOwner.h"
#include "BlockPool.h"
#include <deque>
namespace PTask {
class GlobalBlockPool : public BlockPoolOwner, public Lockable {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pDatablockTemplate"> [in,out] If non-null, the p. </param>
/// <param name="eAcceleratorClass"> acc class. </param>
///-------------------------------------------------------------------------------------------------
GlobalBlockPool(
__in DatablockTemplate * pDatablockTemplate,
__in ACCELERATOR_CLASS eAcceleratorClass,
__in BUFFERACCESSFLAGS ePermissions
);
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pDatablockTemplate"> [in,out] If non-null, the p. </param>
/// <param name="eAcceleratorClass"> acc class. </param>
///-------------------------------------------------------------------------------------------------
GlobalBlockPool(
__in UINT nDataBytes,
__in UINT nMetaBytes,
__in UINT nTemplateBytes,
__in ACCELERATOR_CLASS eAcceleratorClass,
__in BUFFERACCESSFLAGS ePermissions
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~GlobalBlockPool();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has block pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if block pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasBlockPool();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is global pool. </summary>
///
/// <remarks> crossbac, 8/30/2013. </remarks>
///
/// <returns> true if global pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL BlockPoolIsGlobal();
///-------------------------------------------------------------------------------------------------
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
/// we have an upstream allocator (meta) port, the runtime will not create a block
/// pool for the corresponding output port. This turns out to put device-side
/// allocation on the critical path in some cases, so we provide a way to override
/// that behavior and allow a port to create a pool based on some size hints. When
/// there is a block available with sufficient space in the pool, the meta port can
/// avoid the allocation and draw from the pool.
/// </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///
/// <param name="nPoolSize"> Size of the block pool. </param>
/// <param name="nStride"> The stride. </param>
/// <param name="nDataBytes"> The data in bytes. </param>
/// <param name="nMetaBytes"> The meta in bytes. </param>
/// <param name="nTemplateBytes"> The template in bytes. </param>
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
///-------------------------------------------------------------------------------------------------
virtual void
ForceBlockPoolHint(
__in UINT nPoolSize,
__in UINT nStride,
__in UINT nDataBytes,
__in UINT nMetaBytes,
__in UINT nTemplateBytes,
__in BOOL bPageLockHostViews=FALSE,
__in BOOL bEagerDeviceMaterialize=FALSE
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
///
/// Allocation of data-blocks and platform-specific buffers can be a signficant
/// latency expense at dispatch time. We can actually preallocate output datablocks
/// and create device- side buffers at graph construction time. For each node in the
/// graph, allocate data blocks on any output ports, and create device-specific
/// buffers for all accelerators capable of executing the node.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPool(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Configure a block pool, but do not perform allocations on it yet.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
ConfigureBlockPool(
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
/// to null, and then release it. </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void
DestroyBlockPool(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> true if a block pool is active, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
IsBlockPoolActive(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the owner name. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> null if it fails, else the owner name. </returns>
///-------------------------------------------------------------------------------------------------
virtual char *
GetPoolOwnerName(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets high water mark. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetHighWaterMark();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the total number of blocks owned by the pool. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The total number of blocks owned by the pool (whether they are queued or not). </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetOwnedBlockCount();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the low water mark. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetLowWaterMark();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the currently available count. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetAvailableBlockCount();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles only the first pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPoolAsync(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles the second pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
FinalizeBlockPoolAsync(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> add a new block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void AddNewBlock(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> return a block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void ReturnToPool(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> The pool size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetPoolSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetRequestsPageLocked(BOOL bPageLocked);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL GetRequestsPageLocked();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pooled block. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> null if it fails, else the pooled block. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * GetPooledBlock();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this request matches what is present in this pool. </summary>
///
/// <remarks> crossbac, 8/14/2013. </remarks>
///
/// <param name="nDataBytes"> The data in bytes. </param>
/// <param name="nMetaBytes"> The meta in bytes. </param>
/// <param name="nTemplateBytes"> The template in bytes. </param>
///
/// <returns> true if matching request, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
IsMatchingRequest(
__in UINT nDataBytes,
__in UINT nMetaBytes,
__in UINT nTemplateBytes
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a destination buffer for a block with an upstream
/// allocator. Succeeds only if the pool happens to have blocks
/// backed by sufficient resources in all channels that are backed.
/// </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
///
/// <returns> null if it fails, else the destination buffer. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock *
GetBlockFromPool(
__in Accelerator * pAccelerator=NULL,
__in UINT uiDataBytes=0,
__in UINT uiMetaBytes=0,
__in UINT uiTemplateBytes=0
);
protected:
/// <summary> The block pool. </summary>
BlockPool * m_pBlockPool;
/// <summary> true if this object has block pool. </summary>
BOOL m_bHasBlockPool;
/// <summary> The accelerator class. </summary>
ACCELERATOR_CLASS m_eAcceleratorClass;
/// <summary> The permissions. </summary>
BUFFERACCESSFLAGS m_ePermissions;
/// <summary> The template. </summary>
DatablockTemplate * m_pTemplate;
/// <summary> The data in bytes. </summary>
UINT m_nDataBytes;
/// <summary> The meta in bytes. </summary>
UINT m_nMetaBytes;
/// <summary> The template in bytes. </summary>
UINT m_nTemplateBytes;
};
};
#endif

Просмотреть файл

@ -1,257 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: GlobalPoolManager.h
//
// summary: Declares the global pool manager class
///-------------------------------------------------------------------------------------------------
#ifndef __GLOBAL_POOL_MANAGER__
#define __GLOBAL_POOL_MANAGER__
#include <stdio.h>
#include <crtdbg.h>
#include <Windows.h>
#include "datablock.h"
#include "GlobalBlockPool.h"
#include "ptlock.h"
#include <deque>
#include <map>
#include <tuple>
namespace PTask {
class CompiledKernel;
class Graph;
class Channel;
class Port;
class Task;
class Datablock;
class DatablockTemplate;
class GlobalPoolManager : public Lockable {
public:
static GlobalPoolManager * Create();
static void Destroy();
///-------------------------------------------------------------------------------------------------
/// <summary> Require block pool. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="nDataSize"> Size of the data. </param>
/// <param name="nMetaSize"> Size of the meta. </param>
/// <param name="nTemplateSize"> Size of the template. </param>
/// <param name="nBlocks"> (Optional) The blocks. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
RequireBlockPool(
__in DatablockTemplate * pTemplate,
__in int nDataSize,
__in int nMetaSize,
__in int nTemplateSize,
__in int nBlocks=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Require block pool. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="nDataSize"> Size of the data. </param>
/// <param name="nMetaSize"> Size of the meta. </param>
/// <param name="nTemplateSize"> Size of the template. </param>
/// <param name="nBlocks"> (Optional) The blocks. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
RequireBlockPool(
__in int nDataSize,
__in int nMetaSize,
__in int nTemplateSize,
__in int nBlocks=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Require block pool. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="nBlocks"> (Optional) The blocks. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
RequireBlockPool(
__in DatablockTemplate * pTemplate,
__in int nBlocks=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Find a block pool for the block. If there is no good fit,
/// create one if the bCreateIfNotFound flag is set.
/// </summary>
///
/// <remarks> crossbac, 8/30/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
/// <param name="bCreateIfNotFound"> The create if not found. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
AddBlockToBestFitPool(
__in Datablock * pBlock,
__in BOOL bCreateIfNotFound
);
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if we can allocate pools. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL AllocatePools();
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the pools. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL DestroyPools();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate datablock. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="uiDataSize"> Size of the data. </param>
/// <param name="uiMetaSize"> Size of the meta. </param>
/// <param name="uiTemplateSize"> Size of the template. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
Datablock *
AllocateDatablock(
__in DatablockTemplate * pTemplate,
__in UINT uiDataSize,
__in UINT uiMetaSize,
__in UINT uiTemplateSize
);
///-------------------------------------------------------------------------------------------------
/// <summary> Request a pooled block. </summary>
///
/// <remarks> crossbac, 8/21/2013. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="uiDataSize"> Size of the data. </param>
/// <param name="uiMetaSize"> Size of the meta. </param>
/// <param name="uiTemplateSize"> Size of the template. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
static Datablock *
RequestBlock(
__in DatablockTemplate * pTemplate,
__in UINT uiDataSize,
__in UINT uiMetaSize,
__in UINT uiTemplateSize
);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> crossbac, 8/21/2013. </remarks>
///-------------------------------------------------------------------------------------------------
GlobalPoolManager();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> crossbac, 8/21/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~GlobalPoolManager();
///-------------------------------------------------------------------------------------------------
/// <summary> Searches for the first matching pool. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="uiDataSize"> Size of the data. </param>
/// <param name="uiMetaSize"> Size of the meta. </param>
/// <param name="uiTemplateSize"> Size of the template. </param>
/// <param name="uiBlockControlCode"> The block control code. </param>
///
/// <returns> null if it fails, else the found matching pool. </returns>
///-------------------------------------------------------------------------------------------------
GlobalBlockPool *
FindMatchingPool(
__in DatablockTemplate * pTemplate,
__in UINT uiDataSize,
__in UINT uiMetaSize,
__in UINT uiTemplateSize
);
///-------------------------------------------------------------------------------------------------
/// <summary> Find a block pool for the block. If there is no good fit,
/// create one if the bCreateIfNotFound flag is set.
/// </summary>
///
/// <remarks> crossbac, 8/30/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
/// <param name="bCreateIfNotFound"> The create if not found. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
__AddBlockToBestFitPool(
__in Datablock * pBlock,
__in BOOL bCreateIfNotFound
);
static void WarnIfInitialized(char * lspzFunction);
typedef std::tuple<DatablockTemplate*, int, int, int, int> POOLDESCRIPTOR;
static GlobalPoolManager * g_pGlobalPoolManager;
static BOOL g_bPoolsAllocated;
static PTLock g_vPoolsLock;
static std::map<int, POOLDESCRIPTOR> g_vRequiredPoolsUntyped;
static std::map<DatablockTemplate*, POOLDESCRIPTOR> g_vRequiredPoolsTyped;
std::map<int, GlobalBlockPool*> g_vUntypedBlockPools;
std::map<DatablockTemplate*, GlobalBlockPool*> g_vTypedBlockPools;
virtual GlobalPoolManager * GetPoolManager() { return g_pGlobalPoolManager; }
virtual BOOL ArePoolsAllocated() { return g_bPoolsAllocated; }
virtual PTLock * GetPoolLock() { return &g_vPoolsLock; }
virtual std::map<int, POOLDESCRIPTOR>* GetRequiredPoolsUntyped() { return &g_vRequiredPoolsUntyped; }
virtual std::map<DatablockTemplate*, POOLDESCRIPTOR>* GetRequiredPoolsTyped() { return &g_vRequiredPoolsTyped; }
};
};
#endif

Просмотреть файл

@ -1,160 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: GraphProfiler.h
//
// summary: Declares the graph profiler class
///-------------------------------------------------------------------------------------------------
#ifndef __GRAPH_PROFILER_H__
#define __GRAPH_PROFILER_H__
#include "primitive_types.h"
#include <string>
#include <vector>
#include <map>
#include <set>
#include <sstream>
#include <iostream>
namespace PTask {
class Task;
class Graph;
class GraphProfiler
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 7/19/2013. </remarks>
///
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
///-------------------------------------------------------------------------------------------------
GraphProfiler(Graph * pGraph);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 7/19/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~GraphProfiler();
protected:
Graph * m_pGraph;
/// <summary> Lock for ad hoc graph stats. </summary>
CRITICAL_SECTION m_csGraphStats;
/// <summary> The minimum number of concurrent inflight task threads. </summary>
UINT m_uiMinConcurrentInflightThreads;
/// <summary> The maximum number of concurrent inflight task threads. </summary>
UINT m_uiMaxConcurrentInflightThreads;
/// <summary> The concurrent inflight thread accumulator. </summary>
UINT m_uiConcurrentInflightThreadAccumulator;
/// <summary> The minimum number of concurrent inflight dispatch attempts. </summary>
UINT m_uiMinConcurrentInflightDispatches;
/// <summary> The maximum number of concurrent inflight dispatch attempts. </summary>
UINT m_uiMaxConcurrentInflightDispatches;
/// <summary> The maximum concurrent inflight dispatch accumulator. </summary>
UINT m_uiConcurrentInflightDispatchAccumulator;
/// <summary> The minimum task queue occupancy. </summary>
UINT m_uiMinTaskQueueOccupancy;
/// <summary> The maximum task queue occupancy. </summary>
UINT m_uiMaxTaskQueueOccupancy;
/// <summary> The task queue occupancy accumulator. </summary>
UINT m_uiTaskQueueOccupancyAccumulator;
/// <summary> The task queue samples. </summary>
UINT m_uiTaskQueueSamples;
/// <summary> The current number of inflight threads. </summary>
UINT m_uiAliveThreads;
/// <summary> The awake threads. </summary>
UINT m_uiAwakeThreads;
/// <summary> The blocked threads. </summary>
UINT m_uiBlockedRunningThreads;
/// <summary> The blocked threads. </summary>
UINT m_uiBlockedTaskAvailableThreads;
/// <summary> The exited threads. </summary>
UINT m_uiExitedThreads;
/// <summary> The current number of inflight threads. </summary>
UINT m_uiInflightThreads;
/// <summary> The current number of inflight dispatches. </summary>
UINT m_uiInflightDispatchAttempts;
/// <summary> The number of updates to the inflight thread count. </summary>
UINT m_uiInflightThreadUpdates;
/// <summary> The number of updates to the inflight dispatch count. </summary>
UINT m_uiInflightDispatchUpdates;
/// <summary> The total number of dispatch attempts. </summary>
UINT m_uiDispatchAttempts;
/// <summary> The successful dispatch attempts. </summary>
UINT m_uiSuccessfulDispatchAttempts;
/// <summary> The total number of dequeue attempts. </summary>
UINT m_uiDequeueAttempts;
/// <summary> The successful dequeu attempts. </summary>
UINT m_uiSuccessfulDequeueAttempts;
///-------------------------------------------------------------------------------------------------
/// <summary> Initialises the graph statistics. </summary>
///
/// <remarks> crossbac, 7/1/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void Initialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Initialises the graph statistics. </summary>
///
/// <remarks> crossbac, 7/1/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void Destroy();
///-------------------------------------------------------------------------------------------------
/// <summary> Print graph statistics. </summary>
///
/// <remarks> crossbac, 7/1/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void Report(std::ostream& ss);
void OnTaskThreadAlive();
void OnTaskThreadExit();
void OnTaskThreadBlockRunningGraph();
void OnTaskThreadWakeRunningGraph();
void OnTaskThreadBlockTasksAvailable();
void OnTaskThreadWakeTasksAvailable();
void OnTaskThreadDequeueAttempt();
void OnTaskThreadDequeueComplete(Task * pTask);
void OnTaskThreadDispatchAttempt();
void OnTaskThreadDispatchComplete(BOOL bSuccess);
friend class Graph;
};
};
#endif

Просмотреть файл

@ -1,194 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: HostAsyncContext.h
//
// summary: Declares the host asynchronous context class
///-------------------------------------------------------------------------------------------------
#ifndef __HOST_ASYNC_CONTEXT_H__
#define __HOST_ASYNC_CONTEXT_H__
#include "primitive_types.h"
#include "accelerator.h"
#include "hostaccelerator.h"
#include "task.h"
#include "channel.h"
#include <map>
#include <vector>
#include <list>
#include "hrperft.h"
#include "AsyncContext.h"
#include "AsyncDependence.h"
namespace PTask {
class HostAsyncContext : public AsyncContext {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDeviceContext"> [in,out] If non-null, context for the device. </param>
/// <param name="pTaskContext"> [in,out] If non-null, context for the task. </param>
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
///-------------------------------------------------------------------------------------------------
HostAsyncContext(
__in Accelerator * pDeviceContext,
__in Task * pTaskContext,
__in ASYNCCONTEXTTYPE eAsyncContextType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~HostAsyncContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes this object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Initialize();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific create synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual SyncPoint *
PlatformSpecificCreateSyncPoint(
void * pPSSyncObject
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific destroy synchronization point. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificDestroySynchronizationPoint(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if we can platform specific synchronize context. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificSynchronizeContext(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence asynchronously. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificInsertFence(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence synchronously. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificSynchronousWait(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if the sync point is resolved (and marks it if so). </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificQueryOutstanding(
__inout SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform specific nonblocking check whether the event remains outstanding. </summary>
///
/// <remarks> crossbac, 7/2/2013. </remarks>
///
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificNonblockingQueryOutstanding(
__inout SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait for dependence synchronously without locking the async context
/// or underlying accelerator: this simplifies lock acquisition for such
/// waits, but at the expense of leaving live dependences that are
/// actually resolved. </summary>
///
/// <remarks> Crossbac, 5/24/2012. </remarks>
///
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
PlatformSpecificLocklessSynchronousWait(
__in SyncPoint * pSyncPoint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform context object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the platform context object. </returns>
///-------------------------------------------------------------------------------------------------
virtual void *
GetPlatformContextObject();
};
};
#endif

Просмотреть файл

@ -1,521 +0,0 @@
//--------------------------------------------------------------------------------------
// File: InitializerChannel.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _INITIALIZER_CHANNEL_H_
#define _INITIALIZER_CHANNEL_H_
#include "primitive_types.h"
#include "channel.h"
#include "BlockPoolOwner.h"
#include <deque>
namespace PTask {
class BlockPool;
class Datablock;
class DatablockTemplate;
///-------------------------------------------------------------------------------------------------
/// <summary> InitializerChannel. Channel subclass specialized to allocate data based
/// on downstream Port template when pulled. Push is meaningless. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class InitializerChannel : public Channel, public BlockPoolOwner {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
/// <param name="pDatablockTemplate"> [in,out] If non-null, the datablock template. </param>
/// <param name="hRuntimeTerminateEvent"> Handle of the graph terminate event. </param>
/// <param name="hGraphTeardownEvt"> The graph teardown event. </param>
/// <param name="hGraphStopEvent"> Handle of the graph stop event. </param>
/// <param name="lpszChannelName"> [in,out] If non-null, name of the channel. </param>
/// <param name="bHasBlockPool"> the has block pool. </param>
///-------------------------------------------------------------------------------------------------
InitializerChannel(
__in Graph * pGraph,
__in DatablockTemplate * pDatablockTemplate,
__in HANDLE hRuntimeTerminateEvent,
__in HANDLE hGraphTeardownEvt,
__in HANDLE hGraphStopEvent,
__in char * lpszChannelName,
__in BOOL bHasBlockPool
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~InitializerChannel();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if the channel is (or can be) connected to a data source or sink that can be
/// streamed. Generally speaking, this is a property of the primitive whose IO
/// resources are being exposed by this port; consequently this property must be set
/// explicitly by the programmer when graph structures that are stateful are
/// constructured. For example, in a sort primitive, the main input can be streamed
/// (broken into multiple blocks) only if there is a merge network downstream of the
/// node performing the sort. Code that feeds the main input port needs to know this
/// to decide whether to grow blocks until all data is present, or two push partial
/// input.
/// </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> true if the port can stream data, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CanStream();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if channel is ready. This has a different meaning depending on the channel
/// subtype in question, but in general means "is the channel ready to produce or
/// consume datablocks?".
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="type"> (optional) the type of the channel. </param>
///
/// <returns> true if ready, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsReady(CHANNELENDPOINTTYPE type=CE_DST);
///-------------------------------------------------------------------------------------------------
/// <summary> Pulls a datablock from the channel, potentially timing out after dwTimeout
/// milliseconds.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="dwTimeout"> (optional) the timeout in milliseconds. Use 0xFFFFFFFF for no
/// timeout. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Pull(DWORD dwTimeout=0xFFFFFFFF);
///-------------------------------------------------------------------------------------------------
/// <summary> Returns the first available datablock on the channel without removing it. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> null if it fails, else the currently available datablock object. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Peek();
///-------------------------------------------------------------------------------------------------
/// <summary> Pushes a datablock into this channel, blocking until there is capacity
/// for an optional timeout in milliseconds. Default timeout is infinite.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
/// <param name="dwTimeout"> (optional) the timeout in milliseconds. Use 0xFFFFFFFF for no
/// timeout. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Push(Datablock* pBlock, DWORD dwTimeout=0xFFFFFFFF);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is block pool candidate. </summary>
///
/// <remarks> crossbac, 4/30/2013. </remarks>
///
/// <returns> true if block pool candidate, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsBlockPoolCandidate();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is block pool candidate. </summary>
///
/// <remarks> crossbac, 4/30/2013. </remarks>
///
/// <returns> true if block pool candidate, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsAcceleratorOnlyBlockPoolCandidate();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is block pool candidate. </summary>
///
/// <remarks> crossbac, 4/30/2013. </remarks>
///
/// <returns> true if block pool candidate, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsPagelockedBlockPoolCandidate();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has block pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if block pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasBlockPool();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is global pool. </summary>
///
/// <remarks> crossbac, 8/30/2013. </remarks>
///
/// <returns> true if global pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL BlockPoolIsGlobal();
///-------------------------------------------------------------------------------------------------
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
/// we have an upstream allocator (meta) port, the runtime will not create a block
/// pool for the corresponding output port. This turns out to put device-side
/// allocation on the critical path in some cases, so we provide a way to override
/// that behavior and allow a port to create a pool based on some size hints. When
/// there is a block available with sufficient space in the pool, the meta port can
/// avoid the allocation and draw from the pool.
/// </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///
/// <param name="nPoolSize"> Size of the block pool. </param>
/// <param name="nStride"> The stride. </param>
/// <param name="nDataBytes"> The data in bytes. </param>
/// <param name="nMetaBytes"> The meta in bytes. </param>
/// <param name="nTemplateBytes"> The template in bytes. </param>
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
///-------------------------------------------------------------------------------------------------
virtual void
ForceBlockPoolHint(
__in UINT nPoolSize,
__in UINT nStride,
__in UINT nDataBytes,
__in UINT nMetaBytes,
__in UINT nTemplateBytes,
__in BOOL bPageLockHostViews=FALSE,
__in BOOL bEagerDeviceMaterialize=FALSE
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
///
/// Allocation of data-blocks and platform-specific buffers can be a signficant
/// latency expense at dispatch time. We can actually preallocate output datablocks
/// and create device- side buffers at graph construction time. For each node in the
/// graph, allocate data blocks on any output ports, and create device-specific
/// buffers for all accelerators capable of executing the node.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPool(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
/// to null, and then release it. </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void
DestroyBlockPool(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> true if a block pool is active, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
IsBlockPoolActive(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the owner name. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> null if it fails, else the owner name. </returns>
///-------------------------------------------------------------------------------------------------
virtual char *
GetPoolOwnerName(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles only the first pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPoolAsync(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles the second pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
FinalizeBlockPoolAsync(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets high water mark. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetHighWaterMark();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the total number of blocks owned by the pool. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The total number of blocks owned by the pool (whether they are queued or not). </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetOwnedBlockCount();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the low water mark. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetLowWaterMark();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the currently available count. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetAvailableBlockCount();
///-------------------------------------------------------------------------------------------------
/// <summary> add a new block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void AddNewBlock(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> return a block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void ReturnToPool(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> The pool size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetPoolSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetRequestsPageLocked(BOOL bPageLocked);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL GetRequestsPageLocked();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this channel has downstream writers. An output channel is
/// considered a writer because we must conservatively assume consumed
/// blocks will be written.
/// </summary>
///
/// <remarks> crossbac, 8/15/2013. </remarks>
///
/// <returns> true if downstream writers, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasDownstreamWriters();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
/// for this chanell in a way that is consistent with a well-formed graph. Called by
/// CheckSemantics()
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="pos"> [in,out] output string stream. </param>
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
PTask::Graph * pGraph);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate a datablock. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * AllocateBlock(AsyncContext * pAsyncContext);
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if the block that is (or would be) produced in demand to a pull call
/// passes all/any predicates.
/// </summary>
///
/// <remarks> Crossbac, 2/1/2012. </remarks>
///
/// <param name="ppDemandAllocatedBlock"> [out] If non-null, on exit, the demand allocated
/// block if all predicates are passed. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PassesPredicates(Datablock ** ppDemandAllocatedBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a destination buffer for a block with an upstream
/// allocator. Succeeds only if the pool happens to have blocks
/// backed by sufficient resources in all channels that are backed.
/// </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
///
/// <returns> null if it fails, else the destination buffer. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock *
GetBlockFromPool(
__in Accelerator * pAccelerator=NULL,
__in UINT uiDataBytes=0,
__in UINT uiMetaBytes=0,
__in UINT uiTemplateBytes=0
);
/// <summary> The peeked control propagation signal source. </summary>
Datablock * m_pPeekedControlPropagationSignalSrc;
/// <summary> true if a data block was peeked to derive a control propagation signal. </summary>
BOOL m_bControlBlockPeeked;
/// <summary> The code for the peeked control signal. </summary>
CONTROLSIGNAL m_luiPeekedControlSignal;
/// <summary> The block pool. </summary>
BlockPool * m_pBlockPool;
};
};
#endif

Просмотреть файл

@ -1,346 +0,0 @@
//--------------------------------------------------------------------------------------
// File: InitializerPort.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _INITIALIZER_PORT_H_
#define _INITIALIZER_PORT_H_
#include "primitive_types.h"
#include "InputPort.h"
namespace PTask {
class Task;
class Graph;
class Datablock;
class DatablockTemplate;
class AsyncContext;
///-------------------------------------------------------------------------------------------------
/// <summary>
/// Initializer port. An initializer port is always full, and when pulled, will return a new
/// datablock with the value derived from the datablock template with which this port was
/// created. When peeked, an initializer port always returns NULL.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class InitializerPort : public InputPort {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
InitializerPort();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~InitializerPort();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this port is occupied. Initializer ports are always occupied, by
/// definition.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if occupied, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsOccupied();
///-------------------------------------------------------------------------------------------------
/// <summary> Pulls on this port to read the next available datablock. Return a new datablock
/// initialized according to the DatablockTemplate with which this port was created.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Pull();
///-------------------------------------------------------------------------------------------------
/// <summary> Peek at the next datablock on this port. Peek on an InitializerPort always
/// returns NULL, because datablocks are created on demand in response to a pull.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the current top-of-stack object. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Peek();
///-------------------------------------------------------------------------------------------------
/// <summary> Pushes an object into this port. This is a NO-OP for InitializerPorts, since this
/// port type is designed only to be bound to input resources.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="p"> [in,out] If non-null, the Datablock* to push. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Push(Datablock* p);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate a datablock. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pAsyncContext"> [in] (optional) If non-null, the async context where the
/// block will be first used. </param>
/// <param name="bPooled"> [in] true to pooled. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock *
AllocateBlock(
__in AsyncContext * pAsyncContext,
__in BOOL bPooled
);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates a new InitializerPort. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="uiId"> The identifier. </param>
/// <param name="lpszVariableBinding"> [in,out] If non-null, the variable binding. </param>
/// <param name="nParmIdx"> Zero-based index of the n parm. </param>
/// <param name="nInOutRouteIdx"> Zero-based index of the n in out route. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
static Port *
Create(
__in DatablockTemplate * pTemplate,
__in UINT uiId,
__in char * lpszVariableBinding,
__in int nParmIdx,
__in int nInOutRouteIdx
);
///-------------------------------------------------------------------------------------------------
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
/// for this port in a way that is consistent with a well-formed graph. Called by
/// CheckSemantics()
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="pos"> [in,out] output string stream. </param>
/// <param name="pGraph"> [in] non-null, the graph. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
CheckTypeSpecificSemantics(
__inout std::ostream * pos,
__in PTask::Graph * pGraph
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has block pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if block pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasBlockPool();
///-------------------------------------------------------------------------------------------------
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
/// we have an upstream allocator (meta) port, the runtime will not create a block
/// pool for the corresponding output port. This turns out to put device-side
/// allocation on the critical path in some cases, so we provide a way to override
/// that behavior and allow a port to create a pool based on some size hints. When
/// there is a block available with sufficient space in the pool, the meta port can
/// avoid the allocation and draw from the pool.
/// </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///
/// <param name="nPoolSize"> Size of the block pool. </param>
/// <param name="nStride"> The stride. </param>
/// <param name="nDataBytes"> The data in bytes. </param>
/// <param name="nMetaBytes"> The meta in bytes. </param>
/// <param name="nTemplateBytes"> The template in bytes. </param>
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
///-------------------------------------------------------------------------------------------------
virtual void
ForceBlockPoolHint(
__in UINT nPoolSize,
__in UINT nStride,
__in UINT nDataBytes,
__in UINT nMetaBytes,
__in UINT nTemplateBytes,
__in BOOL bPageLockHostViews=FALSE,
__in BOOL bEagerDeviceMaterialize=FALSE
);
///-------------------------------------------------------------------------------------------------
/// <summary> add a new block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void AddNewBlock(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> return a block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void ReturnToPool(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> The pool size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetPoolSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetRequestsPageLocked(BOOL bPageLocked);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL GetRequestsPageLocked();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
///
/// Allocation of data-blocks and platform-specific buffers can be a signficant
/// latency expense at dispatch time. We can actually preallocate output datablocks
/// and create device- side buffers at graph construction time. For each node in the
/// graph, allocate data blocks on any output ports, and create device-specific
/// buffers for all accelerators capable of executing the node.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPool(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
/// to null, and then release it. </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void
DestroyBlockPool(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles only the first pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPoolAsync(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles the second pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
FinalizeBlockPoolAsync(
VOID
);
};
};
#endif

Просмотреть файл

@ -1,493 +0,0 @@
//--------------------------------------------------------------------------------------
// File: InputPort.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _INPUT_PORT_H_
#define _INPUT_PORT_H_
#include "primitive_types.h"
#include "port.h"
namespace PTask {
class Channel;
class Datablock;
class DatablockTemplate;
class Accelerator;
///-------------------------------------------------------------------------------------------------
/// <summary> InputPort: a port subclass specialized to handle binding to input resources in
/// Task nodes.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class InputPort : public Port {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
InputPort();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~InputPort();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is occupied. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <returns> true if occupied, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsOccupied();
///-------------------------------------------------------------------------------------------------
/// <summary>
/// Pulls a datablock from this port, potentially blocking until one becomes available.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Pull();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets an iteration source. </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetIterationSource(Port * pPort);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the iteration source. </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///
/// <returns> null if it fails, else the iteration source. </returns>
///-------------------------------------------------------------------------------------------------
virtual Port * GetIterationSource();
///-------------------------------------------------------------------------------------------------
/// <summary> Returns the datablock occupying this port without removing it. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> null if it fails, else the current block. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Peek();
///-------------------------------------------------------------------------------------------------
/// <summary> Pushes a datablock into this port. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pDatablockToPush"> [in,out] If non-null, the Datablock* to push. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Push(Datablock* pDatablockToPush);
///-------------------------------------------------------------------------------------------------
/// <summary> Bind control channel. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pChannelToBind"> [in,out] If non-null, the channel to bind. </param>
///-------------------------------------------------------------------------------------------------
virtual void BindControlChannel(Channel * pChannelToBind);
///-------------------------------------------------------------------------------------------------
/// <summary> Unbind control channel. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void UnbindControlChannel();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the destination buffer. Should be a no-op for InputPort.</summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
///
/// <returns> null if it fails, else the destination buffer. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * GetDestinationBuffer(Accelerator * pAccelerator=NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a destination buffer. No-op for InputPort. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pDatablock"> [in,out] If non-null, the datablock. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetDestinationBuffer(Datablock * pDatablock);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets an in out consumer. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pInOutConsumerPort"> [in,out] If non-null, the in out consumer port. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetInOutConsumer(Port* pInOutConsumerPort);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a block to be the permanently sticky block for this port. Obviously, only
/// valid for certain kinds of ports (input varieties). Use for blocks that will have
/// only one value for the lifetime of the graph, to avoid creating and manageing an
/// exposed channel or initializer channel that will only every be used once. Do not
/// connect an upstream channel to ports that have been configured with a permanent
/// block.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="p"> If non-null, the Datablock* to push. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetPermanentBlock(Datablock * p);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the in out consumer. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <returns> null if it fails, else the in out consumer. </returns>
///-------------------------------------------------------------------------------------------------
virtual Port* GetInOutConsumer();
///-------------------------------------------------------------------------------------------------
/// <summary> Releases the replayable block. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void ReleaseReplayableBlock();
///-------------------------------------------------------------------------------------------------
/// <summary> Start iteration. </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///
/// <param name="uiIterations"> The iterations. </param>
///-------------------------------------------------------------------------------------------------
virtual void BeginIterationScope(UINT uiIterations);
///-------------------------------------------------------------------------------------------------
/// <summary> complete scoped iteration. </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///
/// <param name="uiIterations"> The iterations. </param>
///-------------------------------------------------------------------------------------------------
virtual void EndIterationScope();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates this object. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pDatablockTemplate"> [in,out] If non-null, the datablock template. </param>
/// <param name="uiUniqueIdentifier"> Unique identifier. </param>
/// <param name="lpszVariableBinding"> [in,out] If non-null, the variable binding. </param>
/// <param name="nParameterIndex"> Zero-based index of the parameter. </param>
/// <param name="nInOutRouteIdx"> Zero-based index of the in out route. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
static Port * Create(DatablockTemplate * pDatablockTemplate,
UINT uiUniqueIdentifier,
char * lpszVariableBinding,
int nParameterIndex,
int nInOutRouteIdx
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has block pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if block pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasBlockPool();
///-------------------------------------------------------------------------------------------------
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
/// we have an upstream allocator (meta) port, the runtime will not create a block
/// pool for the corresponding output port. This turns out to put device-side
/// allocation on the critical path in some cases, so we provide a way to override
/// that behavior and allow a port to create a pool based on some size hints. When
/// there is a block available with sufficient space in the pool, the meta port can
/// avoid the allocation and draw from the pool.
/// </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///
/// <param name="nPoolSize"> Size of the block pool. </param>
/// <param name="nStride"> The stride. </param>
/// <param name="nDataBytes"> The data in bytes. </param>
/// <param name="nMetaBytes"> The meta in bytes. </param>
/// <param name="nTemplateBytes"> The template in bytes. </param>
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
///-------------------------------------------------------------------------------------------------
virtual void
ForceBlockPoolHint(
__in UINT nPoolSize,
__in UINT nStride,
__in UINT nDataBytes,
__in UINT nMetaBytes,
__in UINT nTemplateBytes,
__in BOOL bPageLockHostViews=FALSE,
__in BOOL bEagerDeviceMaterialize=FALSE
);
///-------------------------------------------------------------------------------------------------
/// <summary> add a new block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void AddNewBlock(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> return a block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void ReturnToPool(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> The pool size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetPoolSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetRequestsPageLocked(BOOL bPageLocked);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL GetRequestsPageLocked();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
///
/// Allocation of data-blocks and platform-specific buffers can be a signficant
/// latency expense at dispatch time. We can actually preallocate output datablocks
/// and create device- side buffers at graph construction time. For each node in the
/// graph, allocate data blocks on any output ports, and create device-specific
/// buffers for all accelerators capable of executing the node.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPool(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
/// to null, and then release it. </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void
DestroyBlockPool(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> true if a block pool is active, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
IsBlockPoolActive(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles only the first pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPoolAsync(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles the second pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
FinalizeBlockPoolAsync(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Find the maximal capacity downstream port/channel path starting at this port.
/// </summary>
///
/// <remarks> crossbac, 1/3/2014. </remarks>
///
/// <param name="vTasksVisited"> [in,out] [in,out] If non-null, the tasks visited. </param>
/// <param name="vPath"> [in,out] [in,out] If non-null, full pathname of the file. </param>
///
/// <returns> The found maximal downstream capacity. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
FindMaximalDownstreamCapacity(
__inout std::set<Task*>& vTasksVisited,
__inout std::vector<Channel*>& vPath
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this port is an explicit memory space transition point.
/// We return true only when we know for certain that this task
/// executes on one GPU and at least one downstream tasks definitely
/// needs a view of our outputs on another GPU. In general we can only
/// tell this with high precision when there is task affinity involved.
/// We use this to set the sharing hint on the access flags for blocks
/// allocated, which in turn allows some back ends to better optimize GPU-side
/// buffer allocation and data transfer.
/// </summary>
///
/// <remarks> Crossbac, 3/13/2014. </remarks>
///
/// <returns> true if explicit memory space transition point, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsExplicitMemorySpaceTransitionPoint();
///-------------------------------------------------------------------------------------------------
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
/// for this port in a way that is consistent with a well-formed graph. Called by
/// CheckSemantics()
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="pos"> [in,out] output string stream. </param>
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
PTask::Graph * pGraph);
protected:
/// <summary> The output port that is the consumer
/// if this port is part of an in/out pair
/// </summary>
Port * m_pInOutConsumer;
};
};
#endif

Просмотреть файл

@ -1,153 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: Lockable.h
//
// summary: Declares the lockable object class
///-------------------------------------------------------------------------------------------------
#ifndef __LOCKABLE_OBJECT_H__
#define __LOCKABLE_OBJECT_H__
#include <Windows.h>
#include <iostream>
#include <sstream>
#include <stdio.h>
#include <crtdbg.h>
#include "primitive_types.h"
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> Lockable object. Super-class for all PTask runtime objects that implement coarse
/// object-level locking with CRITICAL_SECTION objects. Since CRITICAL_SECTIONs are
/// re-entrant, so are Lockables.
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class Lockable {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="lpszProtectedObjectName"> [in] If non-null, name of the protected object. </param>
///-------------------------------------------------------------------------------------------------
Lockable(char * lpszProtectedObjectName);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~Lockable();
///-------------------------------------------------------------------------------------------------
/// <summary> Lock this object. </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <returns> the new lock depth. </returns>
///-------------------------------------------------------------------------------------------------
int Lock();
///-------------------------------------------------------------------------------------------------
/// <summary> Unlock this object. </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <returns> the new lock depth. </returns>
///-------------------------------------------------------------------------------------------------
int Unlock();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is locked. This method is to be used in asserts that the
/// current thread holds the lock, and *not* to be used to implement TryLock
/// semantics!
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <returns> true if held, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL LockIsHeld();
///-------------------------------------------------------------------------------------------------
/// <summary> Return the lock depth. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <returns> The lock depth. </returns>
///-------------------------------------------------------------------------------------------------
int GetLockDepth();
///-------------------------------------------------------------------------------------------------
/// <summary> In debug mode, enables/disables tracking for a particular object, returns
/// true if tracking is enabled after the call. When tracking is enabled,
/// all lock/unlock calls are logged to the console. A handy tool for teasing
/// apart deadlocks.
/// </summary>
///
/// <remarks> crossbac, 8/29/2013. </remarks>
///
/// <param name="bEnable"> (Optional) the enable. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL TrackLockActivity(BOOL bEnable=TRUE);
private:
/// <summary> The lock </summary>
CRITICAL_SECTION m_lock;
/// <summary> Depth of the lock </summary>
int m_nLockDepth;
/// <summary> Name of the protected object </summary>
char * m_lpszProtectedObjectName;
/// <summary> Handle of the owning thread, if we are in debug mode. </summary>
DWORD m_dwOwningThreadId;
/// <summary> true if we should log lock/unlock activity for this object. </summary>
BOOL m_bTrack;
/// <summary> The unnested acquires. </summary>
UINT m_uiUnnestedAcquires;
/// <summary> The unnested releases. </summary>
UINT m_uiUnnestedReleases;
///-------------------------------------------------------------------------------------------------
/// <summary> Updates the owning thread identifier. </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="bLocking"> true if this update is for the lock operation, otherwise this update
/// is for an unlock. </param>
///-------------------------------------------------------------------------------------------------
void UpdateOwningThreadId(BOOL bLocking);
///-------------------------------------------------------------------------------------------------
/// <summary> Logs lock activity. </summary>
///
/// <remarks> crossbac, 8/29/2013. </remarks>
///
/// <param name="bLocking"> true to locking. </param>
///-------------------------------------------------------------------------------------------------
void LogLockActivity(BOOL bLocking);
};
};
#endif // __LOCKABLE_OBJECT_H__

Просмотреть файл

@ -1,448 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: MemorySpace.h
//
// summary: Simple class describing a memory space
///-------------------------------------------------------------------------------------------------
#ifndef __MEMORY_SPACE_H__
#define __MEMORY_SPACE_H__
#include "primitive_types.h"
#include "Lockable.h"
#include <map>
#include <set>
#include <iostream>
#include <sstream>
namespace PTask {
static const UINT HOST_MEMORY_SPACE_ID = 0;
static const UINT MAX_MEMORY_SPACES = 12;
static const UINT UNKNOWN_MEMORY_SPACE_ID = 0xFFFFFFFF;
class Accelerator;
typedef void * (__stdcall *LPFNSTATICALLOCATOR)(ULONG, ULONG);
typedef void (__stdcall *LPFNSTATICDEALLOCATOR)(void*);
///-------------------------------------------------------------------------------------------------
/// <summary> Memory status for a memory type on a device.
/// Currently we track global and page-locked memory.
/// Could easily expand to track other types. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
struct DeviceMemoryStatus_t;
struct GlobalDeviceMemoryState_t;
///-------------------------------------------------------------------------------------------------
/// <summary> Memory space object. Encapsulates data about what accelerators are associated
/// with the space, whether there are specialized allocators for managing buffers
/// created in other spaces that must communicate witht this one, whether we need an
/// accelerator object to perform allocations in this space (or any static allocators
/// otherwise).
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class MemorySpace : public Lockable {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the number of memory spaces active in the system. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <returns> The number of memory spaces. </returns>
///-------------------------------------------------------------------------------------------------
static UINT GetNumberOfMemorySpaces();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the accelerator from memory space identifier. </summary>
///
/// <remarks> Crossbac, 12/30/2011. </remarks>
///
/// <param name="id"> The identifier. </param>
///
/// <returns> null if it fails, else the accelerator from memory space identifier. </returns>
///-------------------------------------------------------------------------------------------------
static Accelerator * GetAcceleratorFromMemorySpaceId(UINT id);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the accelerator from memory space identifier. </summary>
///
/// <remarks> Crossbac, 12/30/2011. </remarks>
///
/// <param name="id"> The identifier. </param>
///
/// <returns> null if it fails, else the accelerator from memory space identifier. </returns>
///-------------------------------------------------------------------------------------------------
static MemorySpace * GetMemorySpaceFromId(UINT id);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the percentage of this space already allocated. </summary>
///
/// <remarks> crossbac, 9/10/2013. </remarks>
///
/// <returns> The allocated percent. </returns>
///-------------------------------------------------------------------------------------------------
static UINT GetAllocatedPercent(UINT uiMemorySpaceId);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets allocation percentages. </summary>
///
/// <remarks> crossbac, 9/10/2013. </remarks>
///
/// <param name="vDeviceMemories"> [in,out] The device memories. </param>
///-------------------------------------------------------------------------------------------------
static void GetAllocationPercentages(std::map<UINT, UINT>& vDeviceMemories);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the accelerator from memory space identifier. </summary>
///
/// <remarks> Crossbac, 12/30/2011. </remarks>
///
/// <param name="id"> The identifier. </param>
///
/// <returns> null if it fails, else the accelerator from memory space identifier. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL HasStaticAllocator(UINT id);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate an extent in this memory space. Fails if
/// no static allocator is present. </summary>
///
/// <remarks> Crossbac, 12/30/2011. </remarks>
///
/// <param name="uiMemorySpace"> The identifier. </param>
/// <param name="ulBytesToAllocate"> The ul bytes to allocate. </param>
/// <param name="ulFlags"> The ul flags. </param>
///
/// <returns> null if it fails, else the accelerator from memory space identifier. </returns>
///-------------------------------------------------------------------------------------------------
static void * AllocateMemoryExtent(UINT uiMemorySpace, ULONG ulBytesToAllocate, ULONG ulFlags);
///-------------------------------------------------------------------------------------------------
/// <summary> Deallocate an extent in this memory space. Fails if no static allocator is present.
/// </summary>
///
/// <remarks> Crossbac, 12/30/2011. </remarks>
///
/// <param name="uiMemorySpace"> The identifier. </param>
/// <param name="pMemoryExtent"> [in,out] The ul bytes to allocate. </param>
///-------------------------------------------------------------------------------------------------
static void DeallocateMemoryExtent(UINT uiMemorySpace, void * pMemoryExtent);
///-------------------------------------------------------------------------------------------------
/// <summary> Registers the memory space. </summary>
///
/// <remarks> Crossbac, 12/30/2011. </remarks>
///
/// <param name="pSpace"> [in,out] memory space. </param>
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///-------------------------------------------------------------------------------------------------
static void RegisterMemorySpace(MemorySpace * pSpace, Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Associate the accelerator with the memory space. </summary>
///
/// <remarks> Crossbac, 12/30/2011. </remarks>
///
/// <param name="pSpace"> [in,out] memory space. </param>
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///-------------------------------------------------------------------------------------------------
static void RegisterMemorySpaceId(UINT id, Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes the memory space map. </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void InitializeMemorySpaces();
///-------------------------------------------------------------------------------------------------
/// <summary> Unregisters the memory spaces at tear-down time. </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void UnregisterMemorySpaces();
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="lpszProtectedObjectName"> [in] If non-null, name of the protected object. </param>
///-------------------------------------------------------------------------------------------------
MemorySpace(std::string& szDeviceName, UINT nMemorySpaceId);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~MemorySpace();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this memory space has a static buffer allocator function. </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///
/// <returns> true if static allocator, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL HasStaticAllocator();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate a memory extent in this memory space of the given
/// size. If this memory space does not have a static allocator,
/// return NULL. </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///
/// <param name="ulNumberOfBytes"> The ul number of in bytes. </param>
/// <param name="ulFlags"> The ul flags. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
void * AllocateMemoryExtent(ULONG ulNumberOfBytes, ULONG ulFlags);
///-------------------------------------------------------------------------------------------------
/// <summary> Deallocate memory extent. </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///
/// <param name="pvMemoryExtent"> [in,out] If non-null, extent of the pv memory. </param>
///-------------------------------------------------------------------------------------------------
void DeallocateMemoryExtent(void* pvMemoryExtent);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the percentage of this space already allocated. </summary>
///
/// <remarks> crossbac, 9/10/2013. </remarks>
///
/// <returns> The allocated percent. </returns>
///-------------------------------------------------------------------------------------------------
UINT __GetAllocatedPercent();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a pointer to any accelerator mapped to this space. Most spaces
/// have just one, so this simplifies the process of getting an object
/// that can provide allocation services if no static allocator is present.
/// </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///
/// <returns> null if it fails, else any accelerator. </returns>
///-------------------------------------------------------------------------------------------------
Accelerator * GetAnyAccelerator();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the number of accelerators mapped to this space. </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///
/// <returns> The number of accelerators. </returns>
///-------------------------------------------------------------------------------------------------
UINT GetNumberOfAccelerators();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets all accelerators in this space, by putting them in the user-provided buffer.
/// At most nMaxAccelerators will be provided.
/// </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///
/// <param name="ppAccelerators"> [in,out] If non-null, the accelerators. </param>
/// <param name="nMaxAccelerators"> The maximum accelerators. </param>
///
/// <returns> The number of accelerators in the result buffer, which may be different from
/// nMaxAccelerators!
/// </returns>
///-------------------------------------------------------------------------------------------------
UINT GetAccelerators(Accelerator ** ppAccelerators, UINT nMaxAccelerators);
///-------------------------------------------------------------------------------------------------
/// <summary> Assign a unique memory space identifier. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
static UINT AssignUniqueMemorySpaceIdentifier();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a static allocator function for this memory space. </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///
/// <param name="lpfnStaticAllocatorFunction"> The lpfn static allocator function. </param>
/// <param name="lpfnStaticDeallocatorFunction"> The lpfn static deallocator function. </param>
///-------------------------------------------------------------------------------------------------
void SetStaticAllocator(LPFNSTATICALLOCATOR lpfnStaticAllocatorFunction,
LPFNSTATICDEALLOCATOR lpfnStaticDeallocatorFunction
);
///-------------------------------------------------------------------------------------------------
/// <summary> Adds a deferred allocation entry for the proxy accelerator, indicating that
/// allocations for this space should be deferred to accelerators for that space,
/// when the resulting buffers will be used to commnunicate between those spaces.
/// </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///
/// <param name="pProxyAllocatorAccelerator"> [in,out] If non-null, the proxy allocator
/// accelerator. </param>
///-------------------------------------------------------------------------------------------------
void AddDeferredAllocationEntry(Accelerator* pProxyAllocatorAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Adds an accelerator to this memory space. </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///-------------------------------------------------------------------------------------------------
void AddAccelerator(Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Updates the space size bytes described by uiBytes. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <param name="uiBytes"> The bytes. </param>
///-------------------------------------------------------------------------------------------------
void UpdateSpaceSizeBytes(unsigned __int64 uiBytes);
///-------------------------------------------------------------------------------------------------
/// <summary> Resets the memory state. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void Reset();
///-------------------------------------------------------------------------------------------------
/// <summary> Record a memory allocation. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
/// <param name="uiBytes"> The bytes. </param>
///-------------------------------------------------------------------------------------------------
void
RecordAllocation(
__in void * pMemoryExtent,
__in size_t uiBytes,
__in BOOL bPinned
);
///-------------------------------------------------------------------------------------------------
/// <summary> Record a memory deallocation. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
/// <param name="bPinnedAllocation"> true to pinned allocation. </param>
/// <param name="uiBytes"> The bytes. </param>
///-------------------------------------------------------------------------------------------------
void
RecordDeallocation(
__in void * pMemoryExtent
);
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps the allocation statistics. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void Report(
std::ostream &ios
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets memory state. </summary>
///
/// <remarks> Crossbac, 3/15/2013. </remarks>
///
/// <returns> null if it fails, else the memory state. </returns>
///-------------------------------------------------------------------------------------------------
struct GlobalDeviceMemoryState_t * GetMemoryState();
private:
/// <summary> Name of the device. </summary>
std::string m_strDeviceName;
/// <summary> Identifier for the memory space </summary>
UINT m_nMemorySpaceId;
/// <summary> Pointer to a static allocator function, if
/// one exists for this memory space.
/// </summary>
LPFNSTATICALLOCATOR m_lpfnStaticAllocator;
/// <summary> Pointer to a static de-allocator function, if
/// one exists for this memory space.
/// </summary>
LPFNSTATICDEALLOCATOR m_lpfnStaticDeallocator;
/// <summary> The deferred allocator map. Each entry in this
/// set indicates that memory allocations in this space
/// should be deferred to allocators provided by
/// acclerators mapped to the space identified by the
/// entry. For example, if this memory space describes
/// the host memory space, it will contain an entry for
/// every CUDA memory space because we should be using
/// cuda APIs to allocate host memory for best performance.
/// </summary>
std::set<UINT> m_pDeferredAllocatorSpaces;
/// <summary> The accelerators mapped to this space. </summary>
std::set<Accelerator*> m_pAccelerators;
/// <summary> State of the memory. </summary>
struct GlobalDeviceMemoryState_t * m_pMemoryState;
/// <summary> Counter for assigning unique identifiers
/// to Memory spaces objects.
/// </summary>
static UINT m_uiMemorySpaceIdCounter;
/// <summary> static MemorySpace map </summary>
static MemorySpace* m_vMemorySpaceMap[MAX_MEMORY_SPACES];
};
};
#endif // __MEMORY_SPACE_H__

Просмотреть файл

@ -1,686 +0,0 @@
//--------------------------------------------------------------------------------------
// File: MetaPort.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _META_PORT_H_
#define _META_PORT_H_
#include "primitive_types.h"
#include "port.h"
namespace PTask {
class Channel;
class Datablock;
class DatablockTemplate;
class Accelerator;
///-------------------------------------------------------------------------------------------------
/// <summary> Meta port. A meta port is a port that consumes datablocks but does not bind them
/// to Task inputs. Rather, the runtime uses the contained information to perform
/// operations on behalf of the Task for which the MetaPort is an input. Currently,
/// the only operation of this class is allocation of Datablocks on OutputPorts,
/// although the mechanism will be generalized in the future. A MetaPort consumes a
/// datablock, expecting it to contain a single integer value, which is the
/// interpreted as the allocation size for the OutputPort specified in the
/// m_pAllocatorPort member.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class MetaPort : public Port {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
MetaPort();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~MetaPort();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is occupied. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <returns> true if occupied, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsOccupied();
///-------------------------------------------------------------------------------------------------
/// <summary> Pulls the next datablock. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Pull();
///-------------------------------------------------------------------------------------------------
/// <summary> Peek at the next datablock on this port. Peek on an InitializerPort always
/// returns NULL, because datablocks are created on demand in response to a pull.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the current top-of-stack object. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Peek();
///-------------------------------------------------------------------------------------------------
/// <summary> Pushes an object into this port. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pDatablock"> [in,out] If non-null, the Datablock* to push. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Push(Datablock* pDatablock);
///-------------------------------------------------------------------------------------------------
/// <summary> Bind control channel. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
///-------------------------------------------------------------------------------------------------
virtual void BindControlChannel(Channel * pChannel);
///-------------------------------------------------------------------------------------------------
/// <summary> Unbind control channel. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void UnbindControlChannel();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets the allocation port. This port must be an output port, and is the port on
/// which a new datablock will be allocated when a block is consumed from this
/// MetaPort.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetAllocationPort(Port * pPort);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the allocation port. This port must be an output port, and is the port on
/// which a new datablock will be allocated when a block is consumed from this
/// MetaPort.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <returns> null if it fails, else the allocation port. </returns>
///-------------------------------------------------------------------------------------------------
virtual Port * GetAllocationPort();
///-------------------------------------------------------------------------------------------------
/// <summary> Adds an iteration target to the list. </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
///-------------------------------------------------------------------------------------------------
virtual void BindIterationTarget(Port * pPort);
///-------------------------------------------------------------------------------------------------
/// <summary> Configure iteration targets. </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void ConfigureIterationTargets(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets an integer value from a block consumed from this MetaPort. Should not be
/// called when the port is unoccupied because it will block on a Pull call. On exit,
/// bControlBlock is TRUE if the consumed block carried a control signal;
/// uiControlCode will be set accordingly if this is the case. The integer value can
/// be used by iteration control or output allocation meta functions.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="bControlBlock"> [out] True on exit if the block pulled to compute the
/// allocation size carried a control signal. </param>
/// <param name="luiControlSignal"> [out] If the block pulled to compute the allocation size
/// carried a control signal, the control code from that block. </param>
///
/// <returns> The integer value at offset 0 in the datablock's data channel. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
GetIntegerValue(
BOOL &bControlBlock,
CONTROLSIGNAL &luiControlSignal
);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates a new MetaPort. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pDatablockTemplate"> [in] If non-null, the datablock template. </param>
/// <param name="uiUniqueIdentifier"> Unique identifier (caller-supplied, uniqueness not
/// enforced). </param>
/// <param name="lpszVariableBinding"> [in] If non-null, the variable binding. </param>
/// <param name="nBoundParameterIndex"> Zero-based index of the n bound parameter. </param>
/// <param name="nInOutRouteIdx"> Zero-based index of the n in out route. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
static Port *
Create(
__in DatablockTemplate * pDatablockTemplate,
__in UINT uiUniqueIdentifier,
__in char * lpszVariableBinding,
__in int nBoundParameterIndex,
__in int nInOutRouteIdx
);
///-------------------------------------------------------------------------------------------------
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
/// for this port in a way that is consistent with a well-formed graph. Called by
/// CheckSemantics()
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="pos"> [in,out] output string stream. </param>
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
PTask::Graph * pGraph);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a destination buffer occupying this output port. Meaningless for MetaPorts,
/// but required by the abstract superclass Port.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="pAccelerator"> (optional) [in] If non-null, an accelerator object to assist
/// creating a datablock if none is available. </param>
///
/// <returns> null if it fails, else the destination buffer. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * GetDestinationBuffer(Accelerator * pAccelerator=NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a destination buffer. Meaningless for MetaPorts, but required by the
/// abstract superclass Port.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="p"> [in,out] If non-null, the Datablock* to push. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetDestinationBuffer(Datablock * p);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a block to be the permanently sticky block for this port. Obviously, only
/// valid for certain kinds of ports (input varieties). Use for blocks that will have
/// only one value for the lifetime of the graph, to avoid creating and manageing an
/// exposed channel or initializer channel that will only every be used once. Do not
/// connect an upstream channel to ports that have been configured with a permanent
/// block.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="p"> If non-null, the Datablock* to push. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetPermanentBlock(Datablock * p);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a meta function. </summary>
///
/// <remarks> Crossbac, 1/10/2012. </remarks>
///
/// <param name="eMetaFunctionSpecifier"> Information describing the meta function. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetMetaFunction(METAFUNCTION eMetaFunctionSpecifier);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the meta function. </summary>
///
/// <remarks> Crossbac, 1/10/2012. </remarks>
///
/// <returns> The meta function. </returns>
///-------------------------------------------------------------------------------------------------
virtual METAFUNCTION GetMetaFunction();
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the work associated with this port's meta function. For example, if the
/// port is an allocator, allocate a block for the downstream output port. If it is
/// an iterator, set the iteration count on the Task.
/// </summary>
///
/// <remarks> Crossbac, 1/10/2012. </remarks>
///
/// <param name="pDispatchAccelerator"> [in,out] If non-null, the dispatch accelerator. </param>
///-------------------------------------------------------------------------------------------------
virtual void
PerformMetaFunction(
__in Accelerator * pDispatchAccelerator
);
///-------------------------------------------------------------------------------------------------
/// <summary> Perform any post-dispatch work associated with this port's meta function. For
/// example, if the port is an iteration construct, reset the loop bounds and
/// propagate any control signals associated with the iteration.
/// </summary>
///
/// <remarks> Crossbac, 1/10/2012. </remarks>
///
/// <param name="pDispatchAccelerator"> [in,out] If non-null, the dispatch accelerator. </param>
///-------------------------------------------------------------------------------------------------
virtual void FinalizeMetaFunction(Accelerator * pDispatchAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has block pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if block pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasBlockPool();
///-------------------------------------------------------------------------------------------------
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
/// we have an upstream allocator (meta) port, the runtime will not create a block
/// pool for the corresponding output port. This turns out to put device-side
/// allocation on the critical path in some cases, so we provide a way to override
/// that behavior and allow a port to create a pool based on some size hints. When
/// there is a block available with sufficient space in the pool, the meta port can
/// avoid the allocation and draw from the pool.
/// </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///
/// <param name="nPoolSize"> Size of the block pool. </param>
/// <param name="nStride"> The stride. </param>
/// <param name="nDataBytes"> The data in bytes. </param>
/// <param name="nMetaBytes"> The meta in bytes. </param>
/// <param name="nTemplateBytes"> The template in bytes. </param>
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
///-------------------------------------------------------------------------------------------------
virtual void
ForceBlockPoolHint(
__in UINT nPoolSize,
__in UINT nStride,
__in UINT nDataBytes,
__in UINT nMetaBytes,
__in UINT nTemplateBytes,
__in BOOL bPageLockHostViews=FALSE,
__in BOOL bEagerDeviceMaterialize=FALSE
);
///-------------------------------------------------------------------------------------------------
/// <summary> add a new block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void AddNewBlock(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> return a block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void ReturnToPool(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> The pool size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetPoolSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetRequestsPageLocked(BOOL bPageLocked);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL GetRequestsPageLocked();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
///
/// Allocation of data-blocks and platform-specific buffers can be a signficant
/// latency expense at dispatch time. We can actually preallocate output datablocks
/// and create device- side buffers at graph construction time. For each node in the
/// graph, allocate data blocks on any output ports, and create device-specific
/// buffers for all accelerators capable of executing the node.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPool(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
/// to null, and then release it. </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void
DestroyBlockPool(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> true if a block pool is active, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
IsBlockPoolActive(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles only the first pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPoolAsync(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles the second pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
FinalizeBlockPoolAsync(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Searches for collaborating meta ports: if this port is an allocator
/// for output ports with descriptor ports, block allocation may have
/// dependences on other meta ports for the bound task. We need to know this
/// at dispatch time, but it is a static property of the graph, so
/// we pre-compute it as a side-effect of OnGraphComplete().
/// </summary>
///
/// <remarks> Crossbac, 2/15/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void FindCollaboratingMetaPorts();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets an allocation hint. </summary>
///
/// <remarks> crossbac, 8/21/2013. </remarks>
///
/// <param name="uiAllocationHint"> The allocation hint. </param>
/// <param name="bForceAllocHint"> true to force allocate hint. </param>
///-------------------------------------------------------------------------------------------------
void
SetAllocationHint(
__in UINT uiAllocationHint,
__in BOOL bForceAllocHint
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this port has been configured with a statically known allocation size.
/// </summary>
///
/// <remarks> crossbac, 8/21/2013. </remarks>
///
/// <returns> true if static allocation size, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL IsStaticAllocationSize();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the channel allocation size when this meta port is an allocator for an
/// output port with descriptor ports (meaning another meta port is responsible for
/// computing that allocation size). If this meta port is not involved in such a
/// graph structure, return 0.
/// </summary>
///
/// <remarks> Crossbac, 2/15/2013. </remarks>
///
/// <param name="pDispatchAccelerator"> [in,out] If non-null, the dispatch accelerator. </param>
/// <param name="eFunc"> The function. </param>
/// <param name="ppPortTemplate"> [out] on exit the template for the related collaborative
/// port, if one is available. These are needed when initial
/// values are supplied by the template. </param>
///
/// <returns> The meta buffer allocation size. </returns>
///-------------------------------------------------------------------------------------------------
UINT GetCollaborativeAllocationSize(
__in Accelerator * pDispatchAccelerator,
__in DESCRIPTORFUNC eFunc,
__out DatablockTemplate ** ppPortTemplate
);
///-------------------------------------------------------------------------------------------------
/// <summary> Finalize collaborative allocations. If this port has completed a collaborative
/// allocation (where other meta ports determine meta/template channel sizes)
/// we need to finish the binding of an output block at those ports. </summary>
///
/// <remarks> Crossbac, 2/15/2013. </remarks>
///
/// <param name="pDispatchAccelerator"> [in] non-null, the dispatch accelerator. </param>
/// <param name="pBlock"> [in,out] non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
void FinalizeCollaborativeAllocations(
__in Accelerator * pDispatchAccelerator,
__inout Datablock * pBlock
);
///-------------------------------------------------------------------------------------------------
/// <summary> Perform allocation. In this case, a datablock on a metaport provides an integer-
/// valued allocation size for another output port on the ptask. Hence, this function
/// looks at all metaports, and performs output datablock allocation as needed.
/// </summary>
///
/// <remarks> Crossbac, 1/10/2012. </remarks>
///
/// <param name="pDispatchAccelerator"> [in,out] If non-null, the dispatch accelerator. </param>
///-------------------------------------------------------------------------------------------------
void
PerformAllocation(
__in Accelerator * pDispatchAccelerator
);
///-------------------------------------------------------------------------------------------------
/// <summary> Configure simple iteration. Simple iteration is distinguished from general
/// iteration because it involves iterative invocation of a single PTask node. The
/// mechanisms required to build this are so much simpler than those required to
/// build general iteration over arbitrary subgraphs that it is worth bothering to
/// distinguish the case. Here, the datablock recieved on this port contains an
/// integer-valued iteration count, which we set on the task directly. Task::Dispatch
/// is responsible for clearing the iteration count after dispatch.
/// </summary>
///
/// <remarks> Crossbac, 1/10/2012. </remarks>
///
/// <param name="pDispatchAccelerator"> [in,out] If non-null, the dispatch accelerator. </param>
///-------------------------------------------------------------------------------------------------
void ConfigureSimpleIteration();
///-------------------------------------------------------------------------------------------------
/// <summary> Configure general iteration. </summary>
///
/// <remarks> Crossbac, 1/10/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void ConfigureGeneralIteration();
///-------------------------------------------------------------------------------------------------
/// <summary> Finalize general iteration. (Update iteration state after task dispatch,
/// and propagate control signals where appropriate). </summary>
///
/// <remarks> Crossbac, 1/10/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void FinalizeGeneralIteration();
/// <summary> The allocation port. This port must be an output port, and is the port on
/// which a new datablock will be allocated when a block is consumed from this
/// MetaPort.
/// </summary>
Port * m_pAllocationPort;
/// <summary> The meta function </summary>
METAFUNCTION m_eMetaFunction;
/// <summary> The general iteration block </summary>
Datablock * m_pGeneralIterationBlock;
/// <summary> Number of general iterations </summary>
UINT m_nGeneralIterationCount;
/// <summary> The general iteration maximum </summary>
UINT m_nGeneralIterationMax;
/// <summary> if this object is collaborative allocator and another meta port is responsible
/// for computing the allocation size of the metadata buffer channel on the block
/// allocated by *this* meta-port, we keep a pointer to that other port. Since
/// deciding requires traversing part of the graph structure, we set this once so we
/// don't have to do it again.
/// </summary>
Port * m_pCollaborativeMetaAllocator;
/// <summary> if this object is collaborative allocator and another meta port is responsible
/// for computing the allocation size of the template buffer channel on the block
/// allocated by *this* meta-port, we keep a pointer to that other port. Since
/// deciding requires traversing part of the graph structure, we set this once so we
/// don't have to do it again.
/// </summary>
Port * m_pCollaborativeTemplateAllocator;
/// <summary> An allocation size hint. </summary>
UINT m_uiAllocHint;
/// <summary> true if the allocation hint takes precedence over the value
/// received on the incoming channel for this port. </summary>
BOOL m_bForceAllocHint;
};
};
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,96 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: PBufferProfiler.h
//
// summary: Declares the buffer profiler class
///-------------------------------------------------------------------------------------------------
#ifndef _PBUFFER_PROFILER_H_
#define _PBUFFER_PROFILER_H_
#include "ptaskutils.h"
#include "primitive_types.h"
#include <deque>
#include <set>
#include <map>
#include "hrperft.h"
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> Buffer profiler. Class encapsulating profiling/statistics tools for PBuffers.
/// </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
class PBufferProfiler {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
PBufferProfiler();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~PBufferProfiler();
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Initialises the allocation profiler. </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void Initialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Deinit allocation profiler. </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void Deinitialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps the allocation profiler data. </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void Report(std::ostream &ios);
///-------------------------------------------------------------------------------------------------
/// <summary> Adds an allocation data. </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///
/// <param name="uiAllocBytes"> The allocate in bytes. </param>
/// <param name="uiAccID"> Identifier for the accumulate. </param>
/// <param name="dLatency"> The latency. </param>
///-------------------------------------------------------------------------------------------------
void Record(UINT uiAllocBytes, UINT uiAccID, double dLatency);
std::map<UINT, UINT> m_vAllocationSizes;
std::map<UINT, UINT> m_vAllocationDevices;
std::map<UINT, double> m_vAllocationLatencies;
UINT m_nAllocations;
CHighResolutionTimer * m_pAllocationTimer;
LPCRITICAL_SECTION m_pcsAllocProfiler;
UINT m_bAllocProfilerInit;
};
};
#endif

Просмотреть файл

@ -1,252 +0,0 @@
//--------------------------------------------------------------------------------------
// File: pclbuffer.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _PCLBUFFER_H_
#define _PCLBUFFER_H_
#ifdef OPENCL_SUPPORT
#include <stdio.h>
#include <crtdbg.h>
#include "pbuffer.h"
#include "ptaskutils.h"
#include <map>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> Platform-specific buffer class for OpenCL runtime access to
/// . </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
class PCLBuffer :
public PBuffer
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <param name="pParent"> [in,out] If non-null, the parent. </param>
/// <param name="bufferAccessFlags"> The buffer access flags. </param>
/// <param name="nChannelIndex"> Zero-based index of the n channel. </param>
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
/// <param name="pAllocatorAccelerator"> (optional) [in,out] If non-null, the allocator
/// accelerator. </param>
/// <param name="uiUID"> (optional) the uid. </param>
///-------------------------------------------------------------------------------------------------
PCLBuffer(Datablock * pParent,
BUFFERACCESSFLAGS bufferAccessFlags,
UINT nChannelIndex,
Accelerator * pAccelerator=NULL,
Accelerator * pAllocatorAccelerator=NULL,
UINT uiUID=ptaskutils::nextuid()
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~PCLBuffer(void);
///-------------------------------------------------------------------------------------------------
/// <summary> Force synchronize. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void ForceSynchronize();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize host view. </summary>
///
/// <remarks> crossbac, 1/11/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, information describing the lpv. </param>
/// <param name="pHostSourceBuffer"> [in,out] If non-null, buffer for host source data. </param>
/// <param name="pBuffer"> [in,out] The data. </param>
/// <param name="bForceSynchronous"> (optional) the elide synchronization. </param>
/// <param name="bRequestOutstanding"> [in,out] The request outstanding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateHostView(
__in AsyncContext * pAsyncContext,
__in HOSTMEMORYEXTENT * pBuffer,
__in BOOL bForceSynchronous,
__out BOOL &bRequestOutstanding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize mutable accelerator view. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
/// <param name="lpszBinding"> (optional) the binding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateMutableAcceleratorView(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialData,
__out BOOL& bOutstanding,
__in void * pModule,
__in const char * lpszBinding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize immutable accelerator view. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
/// <param name="uiBufferSizeBytes"> If non-null, the data. </param>
/// <param name="pInitialData"> [in,out] The bytes. </param>
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
/// <param name="lpszBinding"> (optional) the binding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateImmutableAcceleratorView(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialData,
__out BOOL& bOutstanding,
__in void * pModule,
__in const char * lpszBinding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes a device-side buffer that is expected to be bound to mutable device
/// resources (not in constant memory).
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
/// asynchronous. </param>
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
/// contents. </param>
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
/// buffer which will be used to label runtime- specific
/// objects to aid in debugging. Ignored on release
/// builds. </param>
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
/// addressable. </param>
///
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT
CreateMutableBuffer(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialBufferContents,
__in char * strDebugBufferName=NULL,
__in bool bByteAddressable=true
);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes a device-side buffer that is expected to be bound to immutable device
/// resources (i.e. those in constant memory).
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the
/// asynchronous. </param>
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
/// contents. </param>
/// <param name="uiInitialContentsSizeBytes"> (optional) the initial contents size in bytes. </param>
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to
/// the buffer which will be used to label runtime-
/// specific objects to aid in debugging. Ignored on
/// release builds. </param>
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
/// addressable. </param>
///
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT
CreateImmutableBuffer(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialBufferContents,
__in char * strDebugBufferName=NULL,
__in bool bByteAddressable=true
);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates readable bindable objects if the access flags indicate they will be
/// required at dispatch time.
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsReadable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates writable bindable objects if the access flags indicate they will be
/// required at dispatch time.
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsWriteable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates immutable bindable objects if needed for dispatch. </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsImmutable(char * szname = NULL);
/// <summary> true if this buffer is going to bound to a device-side
/// scalar variable. </summary>
BOOL m_bScalarBinding;
};
};
#endif
#endif

Просмотреть файл

@ -1,437 +0,0 @@
//--------------------------------------------------------------------------------------
// File: pcubuffer.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _PCUBUFFER_H_
#define _PCUBUFFER_H_
#include <stdio.h>
#include <crtdbg.h>
#include <cuda.h>
#include "pbuffer.h"
#include "ptaskutils.h"
#include <map>
namespace PTask {
class CUAccelerator;
///-------------------------------------------------------------------------------------------------
/// <summary> Platform-specific buffer for CUDA. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
class PCUBuffer :
public PBuffer
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <param name="pParent"> [in,out] If non-null, the parent. </param>
/// <param name="bufferAccessFlags"> The buffer access flags. </param>
/// <param name="nChannelIndex"> Zero-based index of the n channel. </param>
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
/// <param name="pAllocatorAccelerator"> (optional) [in,out] If non-null, the allocator
/// accelerator. </param>
/// <param name="uiUID"> (optional) the uid. </param>
///-------------------------------------------------------------------------------------------------
PCUBuffer(Datablock * pParent,
BUFFERACCESSFLAGS bufferAccessFlags,
UINT nChannelIndex,
Accelerator * pAccelerator=NULL,
Accelerator * pAllocatorAccelerator=NULL,
UINT uiUID=ptaskutils::nextuid()
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~PCUBuffer(void);
///-------------------------------------------------------------------------------------------------
/// <summary> Force synchronize. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void ForceSynchronize();
///-------------------------------------------------------------------------------------------------
/// <summary> Device to device transfer. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
DeviceToDeviceTransfer(
__inout PBuffer * pDstBuffer,
__in AsyncContext * pAsyncContext
);
///-------------------------------------------------------------------------------------------------
/// <summary> Device memcpy. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pDstBuffer"> If non-null, the accelerator. </param>
/// <param name="pSrcBuffer"> If non-null, buffer for source data. </param>
/// <param name="pAsyncContext"> If non-null, context for the asynchronous. </param>
/// <param name="uiCopyBytes"> The copy in bytes. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
Copy(
__inout PBuffer * pDstBuffer,
__inout PBuffer * pSrcBuffer,
__in AsyncContext * pAsyncContext,
__in UINT uiCopyBytes
);
///-------------------------------------------------------------------------------------------------
/// <summary> Device memcpy. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
/// <param name="pSrcBuffer"> [in,out] If non-null, buffer for source data. </param>
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
Copy(
__inout PBuffer * pDstBuffer,
__inout PBuffer * pSrcBuffer,
__in AsyncContext * pAsyncContext
);
///-------------------------------------------------------------------------------------------------
/// <summary> return true if the derived class supports a memset API. </summary>
///
/// <remarks> crossbac, 8/14/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsMemset();
///-------------------------------------------------------------------------------------------------
/// <summary> memset. </summary>
///
/// <remarks> crossbac, 8/14/2013. </remarks>
///
/// <param name="nValue"> The value. </param>
/// <param name="szExtentBytes"> The extent in bytes. </param>
///
/// <returns> the number of bytes set </returns>
///-------------------------------------------------------------------------------------------------
virtual size_t
FillExtent(
__in int nValue,
__in size_t szExtentBytes=0
);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize host view. </summary>
///
/// <remarks> crossbac, 1/11/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, information describing the lpv. </param>
/// <param name="pBuffer"> [in,out] The data. </param>
/// <param name="bForceSynchronous"> (optional) the elide synchronization. </param>
/// <param name="bRequestOutstanding"> [in,out] The request outstanding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateHostView(
__in AsyncContext * pAsyncContext,
__in HOSTMEMORYEXTENT * pBuffer,
__in BOOL bForceSynchronous,
__out BOOL &bRequestOutstanding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize mutable accelerator view. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
/// <param name="lpszBinding"> (optional) the binding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateMutableAcceleratorView(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialData,
__out BOOL& bOutstanding,
__in void * pModule,
__in const char * lpszBinding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize immutable accelerator view. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
/// <param name="uiBufferSizeBytes"> If non-null, the data. </param>
/// <param name="pInitialData"> [in,out] The bytes. </param>
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
/// <param name="lpszBinding"> (optional) the binding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateImmutableAcceleratorView(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialData,
__out BOOL& bOutstanding,
__in void * pModule,
__in const char * lpszBinding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes a device-side buffer that is expected to be bound to mutable device
/// resources (not in constant memory).
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
/// asynchronous. </param>
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
/// contents. </param>
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
/// buffer which will be used to label runtime- specific
/// objects to aid in debugging. Ignored on release
/// builds. </param>
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
/// addressable. </param>
///
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT
CreateMutableBuffer(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialBufferContents,
__in char * strDebugBufferName=NULL,
__in bool bByteAddressable=true
);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes a device-side buffer that is expected to be bound to immutable device
/// resources (i.e. those in constant memory).
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the
/// asynchronous. </param>
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
/// contents. </param>
/// <param name="uiInitialContentsSizeBytes"> (optional) the initial contents size in bytes. </param>
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to
/// the buffer which will be used to label runtime-
/// specific objects to aid in debugging. Ignored on
/// release builds. </param>
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
/// addressable. </param>
///
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT
CreateImmutableBuffer(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialBufferContents,
__in char * strDebugBufferName=NULL,
__in bool bByteAddressable=true
);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates readable bindable objects if the access flags indicate they will be
/// required at dispatch time.
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsReadable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates writable bindable objects if the access flags indicate they will be
/// required at dispatch time.
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsWriteable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates immutable bindable objects if needed for dispatch. </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsImmutable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if a device-side view of this data can be materialized
/// using memset APIs rather than memcpy APIs. </summary>
///
/// <remarks> crossbac, 7/10/2012. </remarks>
///
/// <param name="uiBufferBytes"> The buffer in bytes. </param>
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
/// <param name="uiInitialDataBytes"> The bytes. </param>
///
/// <returns> true if device view memsettable, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
IsDeviceViewMemsettable(
__in UINT uiBufferBytes,
__in void * pInitialData,
__in UINT uiInitialDataBytes
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a device memset stride. </summary>
///
/// <remarks> crossbac, 7/10/2012. </remarks>
///
/// <param name="uiBufferBytes"> The buffer in bytes. </param>
/// <param name="uiInitialDataBytes"> The bytes. </param>
///
/// <returns> The device memset stride. </returns>
///-------------------------------------------------------------------------------------------------
UINT
GetDeviceMemsetStride(
__in UINT uiBufferBytes,
__in UINT uiInitialDataBytes
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a device memset count. </summary>
///
/// <remarks> crossbac, 7/10/2012. </remarks>
///
/// <param name="uiBufferBytes"> The buffer in bytes. </param>
/// <param name="uiInitialDataBytes"> The bytes. </param>
///
/// <returns> The device memset count. </returns>
///-------------------------------------------------------------------------------------------------
UINT
GetDeviceMemsetCount(
__in UINT uiBufferBytes,
__in UINT uiInitialDataBytes
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a device memset value. </summary>
///
/// <remarks> crossbac, 7/10/2012. </remarks>
///
/// <param name="pInitialValue"> [in,out] The buffer in bytes. </param>
///
/// <returns> The device memset count. </returns>
///-------------------------------------------------------------------------------------------------
VOID *
GetDeviceMemsetValue(
__in void * pInitialValue
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the CUDA stream. </summary>
///
/// <remarks> crossbac, 7/10/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
///
/// <returns> The stream. </returns>
///-------------------------------------------------------------------------------------------------
CUstream GetStream(AsyncContext * pAsyncContext);
/// <summary> The platform-specific accelerator. For convenience--we could typecast
/// m_pAccelerator inherited from the super-class every time we want one,
/// but it's ugly, and happens alot. </summary>
CUAccelerator * m_pPSAcc;
/// <summary> Buffer for page locked allocations. Asynchronous transfers in CUDA
/// require the host-side to be page-locked. When we create a buffer that
/// requires asynchronous transfers, we will page-lock the initial data if it is
/// provided, remembering to un-pin it at delete time, or allocate a page-locked
/// buffer if it is not provided. </summary>
void * m_pPageLockedBuffer;
/// <summary> true if the page locked buffer is owned by this object, and must
/// therefore be freed (instead of un-pinned) at deletion time. </summary>
BOOL m_bPageLockedBufferOwned;
/// <summary> true if the device buffer was created using cuMemAlloc and we
/// are responsible for freeing it. If the device buffer was
/// created by finding the device-side mapping for a page-locked
/// buffer, then it shares the fate of the page-locked buffer
/// and we must be careful not to free it.
/// and should not free it. </summary>
BOOL m_bDeviceBufferOwned;
};
};
#endif

Просмотреть файл

@ -1,335 +0,0 @@
//--------------------------------------------------------------------------------------
// File: pdxbuffer.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _PDXBUFFER_H_
#define _PDXBUFFER_H_
#include <stdio.h>
#include <crtdbg.h>
#include "ptdxhdr.h"
#include "pbuffer.h"
#include "ptaskutils.h"
#include <map>
namespace PTask {
class PDXBuffer :
public PBuffer
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pParentDatablock"> [in,out] If non-null, the parent datablock. </param>
/// <param name="bufferAccessFlags"> The buffer access flags. </param>
/// <param name="nChannelIndex"> Zero-based index of the datablock channel this
/// PBuffer is backing. </param>
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
/// <param name="pAllocatingAccelerator"> (optional) [in,out] If non-null, the allocating
/// accelerator. </param>
/// <param name="uiUniqueIdentifier"> (optional) unique identifier. </param>
///-------------------------------------------------------------------------------------------------
PDXBuffer(Datablock * pParent,
BUFFERACCESSFLAGS f,
UINT nChannelIndex,
Accelerator * p=NULL,
Accelerator * pAllocator=NULL,
UINT uiUID=ptaskutils::nextuid()
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~PDXBuffer(void);
///-------------------------------------------------------------------------------------------------
/// <summary> Force synchronize. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void ForceSynchronize();
///-------------------------------------------------------------------------------------------------
/// <summary> Complete any outstanding ops. </summary>
///
/// <remarks> Crossbac, 3/12/2014. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL CompleteOutstandingOps();
///-------------------------------------------------------------------------------------------------
/// <summary> Check for any outstanding ops. </summary>
///
/// <remarks> Crossbac, 3/12/2014. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL HasOutstandingOps();
///-------------------------------------------------------------------------------------------------
/// <summary> Device to device transfer. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
DeviceToDeviceTransfer(
__inout PBuffer * pDstBuffer,
__in AsyncContext * pAsyncContext
);
///-------------------------------------------------------------------------------------------------
/// <summary> Acquires the synchronise. </summary>
///
/// <remarks> Crossbac, 3/14/2014. </remarks>
///
/// <param name="uiAcquireKey"> The acquire key. </param>
///-------------------------------------------------------------------------------------------------
PDXBuffer *
PlatformSpecificAcquireSync(
__in UINT64 uiAcquireKey
);
///-------------------------------------------------------------------------------------------------
/// <summary> Releases the synchronise. </summary>
///
/// <remarks> Crossbac, 3/14/2014. </remarks>
///
/// <param name="uiReleaseKey"> The release key. </param>
///-------------------------------------------------------------------------------------------------
void
PlatformSpecificReleaseSync(
__in UINT64 uiReleaseKey
);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize host view. </summary>
///
/// <remarks> crossbac, 1/11/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, information describing the lpv. </param>
/// <param name="pBuffer"> [in,out] The data. </param>
/// <param name="bForceSynchronous"> (optional) the elide synchronization. </param>
/// <param name="bRequestOutstanding"> [in,out] The request outstanding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateHostView(
__in AsyncContext * pAsyncContext,
__in HOSTMEMORYEXTENT * pBuffer,
__in BOOL bForceSynchronous,
__out BOOL &bRequestOutstanding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize mutable accelerator view. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
/// <param name="lpszBinding"> (optional) the binding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateMutableAcceleratorView(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialData,
__out BOOL& bOutstanding,
__in void * pModule,
__in const char * lpszBinding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize immutable accelerator view. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
/// <param name="uiBufferSizeBytes"> If non-null, the data. </param>
/// <param name="pInitialData"> [in,out] The bytes. </param>
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
/// <param name="lpszBinding"> (optional) the binding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateImmutableAcceleratorView(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialData,
__out BOOL& bOutstanding,
__in void * pModule,
__in const char * lpszBinding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes a device-side buffer that is expected to be bound to mutable device
/// resources (not in constant memory).
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
/// asynchronous. </param>
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
/// contents. </param>
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
/// buffer which will be used to label runtime- specific
/// objects to aid in debugging. Ignored on release
/// builds. </param>
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
/// addressable. </param>
///
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT
CreateMutableBuffer(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialBufferContents,
__in char * strDebugBufferName=NULL,
__in bool bByteAddressable=true
);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes a device-side buffer that is expected to be bound to immutable device
/// resources (i.e. those in constant memory).
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the
/// asynchronous. </param>
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
/// contents. </param>
/// <param name="uiInitialContentsSizeBytes"> (optional) the initial contents size in bytes. </param>
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to
/// the buffer which will be used to label runtime-
/// specific objects to aid in debugging. Ignored on
/// release builds. </param>
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
/// addressable. </param>
///
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT
CreateImmutableBuffer(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialBufferContents,
__in char * strDebugBufferName=NULL,
__in bool bByteAddressable=true
);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates readable bindable objects if the access flags indicate they will be
/// required at dispatch time.
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsReadable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates writable bindable objects if the access flags indicate they will be
/// required at dispatch time.
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsWriteable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates immutable bindable objects if needed for dispatch. </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsImmutable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates device to host staging buffer. </summary>
///
/// <remarks> Crossbac, 3/11/2014. </remarks>
///
/// <param name="pDevice"> [in,out] If non-null, the device. </param>
///
/// <returns> The new hto d stage buffer. </returns>
///-------------------------------------------------------------------------------------------------
HRESULT
PDXBuffer::CreateStagingBuffer(
__in ID3D11Device * pDevice
);
HANDLE m_hDXGIHandle;
IDXGIKeyedMutex * m_pDXGIKeyedMutex;
IDXGIResource * m_pDXGIResource;
ID3D11Query * m_pOutstandingQuery;
ID3D11Buffer * m_pStageBuffer;
ID3D11Buffer * m_pOutstandingOpBuffer;
HOSTMEMORYEXTENT * m_pOutstandingHtoDTarget;
HOSTMEMORYEXTENT * m_pOutstandingDtoHTarget;
BOOL m_bHtoDStagePopulated;
BOOL m_bDtoHStagePopulated;
BOOL m_bP2PShareable;
BOOL m_bP2PLocked;
};
};
#endif

Просмотреть файл

@ -1,360 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: PDXTextureBuffer.h
//
// summary: Implements the a PBuffer subclass over DirectX backend that uses
// ID3D11Textures* objects instead of ID3D11Buffer objects. The goal of the
// implementation was to enable cross-GPU sharing of resources through
// DX APIs, in hopes of avoiding the device-sync that is currently required
// by any GPU-host copyback. The APIs in question work only on Texture2D
// objects with no mip-maps: so I wrote a version that backs PBuffers with those
// instead. Unfortunately, the sharing APIs *still* didn't work. Moreover, you can't
// bind textures to compute shaders, so the whole thing wound up being a dead
// end. Enough code was involved that it seemed worth preserving despite it's
// out-of-the-box obsolescence.
//
///-------------------------------------------------------------------------------------------------
#ifndef _PDXTEXTUREBUFFER_H_
#define _PDXTEXTUREBUFFER_H_
#include <stdio.h>
#include <crtdbg.h>
#include "ptdxhdr.h"
#include "pbuffer.h"
#include "ptaskutils.h"
#include <map>
namespace PTask {
class PDXTextureBuffer :
public PBuffer
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pParentDatablock"> [in,out] If non-null, the parent datablock. </param>
/// <param name="bufferAccessFlags"> The buffer access flags. </param>
/// <param name="nChannelIndex"> Zero-based index of the datablock channel this
/// PBuffer is backing. </param>
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
/// <param name="pAllocatingAccelerator"> (optional) [in,out] If non-null, the allocating
/// accelerator. </param>
/// <param name="uiUniqueIdentifier"> (optional) unique identifier. </param>
///-------------------------------------------------------------------------------------------------
PDXTextureBuffer(
__in Datablock * pParent,
__in BUFFERACCESSFLAGS f,
__in UINT nChannelIndex,
__in Accelerator * p=NULL,
__in Accelerator * pAllocator=NULL,
__in UINT uiUID=ptaskutils::nextuid()
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~PDXTextureBuffer(void);
///-------------------------------------------------------------------------------------------------
/// <summary> Force synchronize. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void ForceSynchronize();
///-------------------------------------------------------------------------------------------------
/// <summary> Complete any outstanding ops. </summary>
///
/// <remarks> Crossbac, 3/12/2014. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL CompleteOutstandingOps();
///-------------------------------------------------------------------------------------------------
/// <summary> Check for any outstanding ops. </summary>
///
/// <remarks> Crossbac, 3/12/2014. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL HasOutstandingOps();
///-------------------------------------------------------------------------------------------------
/// <summary> Device to device transfer. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
DeviceToDeviceTransfer(
__inout PBuffer * pDstBuffer,
__in AsyncContext * pAsyncContext
);
///-------------------------------------------------------------------------------------------------
/// <summary> Acquires the synchronise. </summary>
///
/// <remarks> Crossbac, 3/14/2014. </remarks>
///
/// <param name="uiAcquireKey"> The acquire key. </param>
///-------------------------------------------------------------------------------------------------
PDXTextureBuffer *
PlatformSpecificAcquireSync(
__in UINT64 uiAcquireKey
);
///-------------------------------------------------------------------------------------------------
/// <summary> Releases the synchronise. </summary>
///
/// <remarks> Crossbac, 3/14/2014. </remarks>
///
/// <param name="uiReleaseKey"> The release key. </param>
///-------------------------------------------------------------------------------------------------
void
PlatformSpecificReleaseSync(
__in UINT64 uiReleaseKey
);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize host view. </summary>
///
/// <remarks> crossbac, 1/11/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, information describing the lpv. </param>
/// <param name="pBuffer"> [in,out] The data. </param>
/// <param name="bForceSynchronous"> (optional) the elide synchronization. </param>
/// <param name="bRequestOutstanding"> [in,out] The request outstanding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateHostView(
__in AsyncContext * pAsyncContext,
__in HOSTMEMORYEXTENT * pBuffer,
__in BOOL bForceSynchronous,
__out BOOL &bRequestOutstanding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize mutable accelerator view. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
/// <param name="lpszBinding"> (optional) the binding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateMutableAcceleratorView(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialData,
__out BOOL& bOutstanding,
__in void * pModule,
__in const char * lpszBinding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize immutable accelerator view. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
/// <param name="uiBufferSizeBytes"> If non-null, the data. </param>
/// <param name="pInitialData"> [in,out] The bytes. </param>
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
/// <param name="lpszBinding"> (optional) the binding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateImmutableAcceleratorView(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialData,
__out BOOL& bOutstanding,
__in void * pModule,
__in const char * lpszBinding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes a device-side buffer that is expected to be bound to mutable device
/// resources (not in constant memory).
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
/// asynchronous. </param>
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
/// contents. </param>
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
/// buffer which will be used to label runtime- specific
/// objects to aid in debugging. Ignored on release
/// builds. </param>
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
/// addressable. </param>
///
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT
CreateMutableBuffer(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialBufferContents,
__in char * strDebugBufferName=NULL,
__in bool bByteAddressable=true
);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes a device-side buffer that is expected to be bound to immutable device
/// resources (i.e. those in constant memory).
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the
/// asynchronous. </param>
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
/// contents. </param>
/// <param name="uiInitialContentsSizeBytes"> (optional) the initial contents size in bytes. </param>
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to
/// the buffer which will be used to label runtime-
/// specific objects to aid in debugging. Ignored on
/// release builds. </param>
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
/// addressable. </param>
///
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT
CreateImmutableBuffer(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialBufferContents,
__in char * strDebugBufferName=NULL,
__in bool bByteAddressable=true
);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates readable bindable objects if the access flags indicate they will be
/// required at dispatch time.
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsReadable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates writable bindable objects if the access flags indicate they will be
/// required at dispatch time.
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsWriteable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates immutable bindable objects if needed for dispatch. </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsImmutable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates host to device stage buffer. </summary>
///
/// <remarks> Crossbac, 3/11/2014. </remarks>
///
/// <param name="pDevice"> [in,out] If non-null, the device. </param>
///
/// <returns> The new hto d stage buffer. </returns>
///-------------------------------------------------------------------------------------------------
HRESULT
CreateHtoDStageBuffer(
__in ID3D11Device * pDevice
);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates device to host staging buffer. </summary>
///
/// <remarks> Crossbac, 3/11/2014. </remarks>
///
/// <param name="pDevice"> [in,out] If non-null, the device. </param>
///
/// <returns> The new hto d stage buffer. </returns>
///-------------------------------------------------------------------------------------------------
HRESULT
CreateDtoHStageBuffer(
__in ID3D11Device * pDevice
);
HANDLE m_hDXGIHandle;
IDXGIKeyedMutex * m_pDXGIKeyedMutex;
IDXGIResource * m_pDXGIResource;
ID3D11Resource * m_pDtoHStageBuffer;
ID3D11Resource * m_pHtoDStageBuffer;
BOOL m_bHtoDStagePopulated;
BOOL m_bDtoHStagePopulated;
BOOL m_bP2PShareable;
BOOL m_bP2PLocked;
};
};
#endif

Просмотреть файл

@ -1,335 +0,0 @@
//--------------------------------------------------------------------------------------
// File: phbuffer.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _PHBUFFER_H_
#define _PHBUFFER_H_
#include <stdio.h>
#include <crtdbg.h>
#include "pbuffer.h"
#include "ptaskutils.h"
#include <map>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> Device specific buffer representing host memory. This should be a wrapper around
/// a simple buffer created with malloc (or potentially run-time specific allocator
/// from another platform). The essential idea is that in a given Datablock's buffer
/// map, the PHBuffer entry should always be the place to look for a host-accessible
/// buffer. When an up-to-date one is not available, then we start materializing
/// views from other memory spaces.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class PHBuffer :
public PBuffer
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pParent"> [in,out] If non-null, the parent. </param>
/// <param name="accessFlags"> The access flags. </param>
/// <param name="nChannelIndex"> Zero-based index of the n channel. </param>
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
/// <param name="pAllocAccelerator"> (optional) [in,out] If non-null, the allocate
/// accelerator. </param>
/// <param name="uiUID"> (optional) the uid. </param>
///-------------------------------------------------------------------------------------------------
PHBuffer(Datablock * pParent,
BUFFERACCESSFLAGS accessFlags,
UINT nChannelIndex,
Accelerator * pAccelerator=NULL,
Accelerator * pAllocAccelerator=NULL,
UINT uiUID=ptaskutils::nextuid()
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~PHBuffer(void);
///-------------------------------------------------------------------------------------------------
/// <summary> Force synchronize. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void ForceSynchronize();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize host view. </summary>
///
/// <remarks> crossbac, 1/11/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, information describing the lpv. </param>
/// <param name="pBuffer"> [in,out] The data. </param>
/// <param name="bForceSynchronous"> (optional) the elide synchronization. </param>
/// <param name="bRequestOutstanding"> [in,out] The request outstanding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateHostView(
__in AsyncContext * pAsyncContext,
__in HOSTMEMORYEXTENT * pBuffer,
__in BOOL bForceSynchronous,
__out BOOL &bRequestOutstanding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize mutable accelerator view. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
/// <param name="lpszBinding"> (optional) the binding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateMutableAcceleratorView(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialData,
__out BOOL& bOutstanding,
__in void * pModule,
__in const char * lpszBinding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Materialize immutable accelerator view. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
/// <param name="uiBufferSizeBytes"> If non-null, the data. </param>
/// <param name="pInitialData"> [in,out] The bytes. </param>
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
/// <param name="lpszBinding"> (optional) the binding. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
__populateImmutableAcceleratorView(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialData,
__out BOOL& bOutstanding,
__in void * pModule,
__in const char * lpszBinding
);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes a device-side buffer that is expected to be bound to mutable device
/// resources (not in constant memory).
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
/// asynchronous. </param>
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
/// contents. </param>
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
/// buffer which will be used to label runtime- specific
/// objects to aid in debugging. Ignored on release
/// builds. </param>
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
/// addressable. </param>
///
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT
CreateMutableBuffer(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialBufferContents,
__in char * strDebugBufferName=NULL,
__in bool bByteAddressable=true
);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes a device-side buffer that is expected to be bound to immutable device
/// resources (i.e. those in constant memory).
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the
/// asynchronous. </param>
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
/// contents. </param>
/// <param name="uiInitialContentsSizeBytes"> (optional) the initial contents size in bytes. </param>
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to
/// the buffer which will be used to label runtime-
/// specific objects to aid in debugging. Ignored on
/// release builds. </param>
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
/// addressable. </param>
///
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT
CreateImmutableBuffer(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialBufferContents,
__in char * strDebugBufferName=NULL,
__in bool bByteAddressable=true
);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates readable bindable objects if the access flags indicate they will be
/// required at dispatch time.
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsReadable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates writable bindable objects if the access flags indicate they will be
/// required at dispatch time.
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsWriteable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates immutable bindable objects if needed for dispatch. </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
/// used for debugging. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual PTRESULT CreateBindableObjectsImmutable(char * szname = NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> return true if the derived class supports a memset API. </summary>
///
/// <remarks> crossbac, 8/14/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsMemset();
///-------------------------------------------------------------------------------------------------
/// <summary> memset. </summary>
///
/// <remarks> crossbac, 8/14/2013. </remarks>
///
/// <param name="nValue"> The value. </param>
/// <param name="szExtentBytes"> The extent in bytes. </param>
///
/// <returns> the number of bytes set </returns>
///-------------------------------------------------------------------------------------------------
virtual size_t
FillExtent(
__in int nValue,
__in size_t szExtentBytes=0
);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes a host buffer. Since most of the views created by required overrides
/// in PBuffer are meaningless in host memory (e.g. immutability)
/// we provide one routine to create buffers, and map all the required overrides to
/// it.
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
/// asynchronous. </param>
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
/// contents. </param>
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
/// buffer which will be used to label runtime- specific
/// objects to aid in debugging. Ignored on release
/// builds. </param>
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
/// addressable. </param>
///
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
PTRESULT
InitializeBuffer(
__in AsyncContext * pAsyncContext,
__in UINT uiBufferSizeBytes,
__in HOSTMEMORYEXTENT * pInitialBufferContents,
__in char * strDebugBufferName,
__in bool bByteAddressable
);
///-------------------------------------------------------------------------------------------------
/// <summary> Finalize the dimensions of the device buffer that will be created to back this
/// PHBuffer. We specialize the host buffer implementation to not
/// require the block to be sealed to allocate buffers!
/// </summary>
///
/// <remarks> Crossbac, 1/4/2012. </remarks>
///
/// <param name="bByteAddressable"> [out] (optional) true if the buffer should be byte
/// addressable. </param>
/// <param name="uiBufferSizeBytes"> (optional) the buffer size in bytes. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
FinalizeDimensions(
__out bool &bByteAddressable,
__in UINT uiBufferSizeBytes
);
};
};
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,165 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: Partitioner.h
//
// summary: Declares the partitioner class
///-------------------------------------------------------------------------------------------------
#ifndef __PARTITIONER_H__
#define __PARTITIONER_H__
#include <Windows.h>
#include <iostream>
#include <sstream>
#include <stdio.h>
#include <crtdbg.h>
#include "primitive_types.h"
namespace PTask {
class Graph;
///-------------------------------------------------------------------------------------------------
/// <summary> Graph partitioner class. Based on Renato et al.'s optimal cut partitioner.
///
/// Currently, calls out to a .exe. In the future will use a DLL-based version directly.
/// Work preparing for the DLL-based version is currently guarded by
/// #ifdef USE_GRAPH_PARTITIONER_DLL
///
/// <remarks> Crossbac, 12/10/2013. </remarks>
///-------------------------------------------------------------------------------------------------
class Partitioner {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> jcurrey, 2/1/2014. </remarks>
///
/// TODO JC params
///-------------------------------------------------------------------------------------------------
Partitioner(
Graph * graph,
int numPartitions = 2,
const char * workingDir = NULL,
const char * fileNamePrefix = NULL
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> jcurrey 2/1/2014. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~Partitioner();
///-------------------------------------------------------------------------------------------------
/// <summary> Partition the ptask graph into nPartition. If successful, return true.
///
/// Currently only 2 partitions are supported.
///
/// <remarks> jcurrey, 2/1/2014. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL Partition();
protected:
friend class Graph;
/// <summary> The input ptask graph being partitioned. </summary>
Graph * m_graph;
/// <summary> The number of partitions to divide the graph into. </summary>
int m_numPartitions;
/// <summary> The directory in which files related to the execution of the partitioner will be written. </summary>
std::string m_workingDir;
/// <summary> The prefix of the names of the files which will be written. </summary>
std::string m_fileNamePrefix;
///-------------------------------------------------------------------------------------------------
/// <summary> Read the partitioner's solution from a file into an array. </summary>
///
/// <remarks> jcurrey, 2/1/2014. </remarks>
///
/// TODO JC params
///-------------------------------------------------------------------------------------------------
BOOL ReadSolutionFile(
const char * fileName,
int expectedNumValues,
int * values
);
#ifdef USE_GRAPH_PARTITIONER_DLL
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/10/2013. </remarks>
///-------------------------------------------------------------------------------------------------
Partitioner(Graph * pGraph);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/10/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~Partitioner();
///-------------------------------------------------------------------------------------------------
/// <summary> Partition the ptask graph into nPartition. If successful, return true, and set
/// nSolutionValue and nSolutionEvaluation, which are (somewhat obscure)
/// metrics of the quality of the solution.
/// </summary>
///
/// <remarks> Crossbac, 12/10/2013. </remarks>
///
/// <param name="nPartitions"> The partitions. </param>
/// <param name="nSolutionValue"> [out] The solution value. </param>
/// <param name="nSolutionEvaluation"> [out] The solution evaluation. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL Partition(int nPartitions, int& nSolutionValue, int& nSolutionEvaluation);
///-------------------------------------------------------------------------------------------------
/// <summary> Assign the partition created by a successful call to Partition to the
/// underlying PTask graph. </summary>
///
/// <remarks> Crossbac, 12/10/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL AssignPartition();
protected:
/// <summary> The input ptask graph being partitioned. </summary>
Graph * m_pGraph;
/// <summary> The solution: an integer-valued partition id per node in m_pGraph </summary>
int * m_pSolution;
/// <summary> true if the operation was a success, false if it failed. </summary>
BOOL m_bSolutionValid;
/// <summary> The solution value. </summary>
int m_nSolutionValue;
/// <summary> The solution evaluation. </summary>
int m_nSolutionEvaluation;
friend class Graph;
#endif // USE_GRAPH_PARTITIONER_DLL
};
};
#endif // __PARTITIONER_H__

Просмотреть файл

@ -1,290 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: PhysicalDevice.h
//
// summary: Declares the physical device class
///-------------------------------------------------------------------------------------------------
#pragma once
#include <deque>
#include <vector>
#include <set>
#include "oclhdr.h"
#include "ptdxhdr.h"
#include "cuhdr.h"
#include "accelerator.h"
#include "primitive_types.h"
#include "PhysicalDevice.h"
#include "Lockable.h"
#include <map>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// Forward declarations
///-------------------------------------------------------------------------------------------------
class Task;
class DXAccelerator;
#ifdef CUDA_SUPPORT
class CUAccelerator;
#endif
#ifdef OPENCL_SUPPORT
class CLAccelerator;
#endif
class HostAccelerator;
///-------------------------------------------------------------------------------------------------
/// <summary> DIRECTX_DEVICERECORD: everything we have available to uniquely identify a device
/// through the DXGI API.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
typedef struct dxdevrec_t {
IDXGIAdapter * pAdapter;
DXGI_ADAPTER_DESC desc;
} DIRECTX_DEVICERECORD;
#ifdef OPENCL_SUPPORT
///-------------------------------------------------------------------------------------------------
/// <summary> OPENCL_DEVICERECORD: everything we have available to uniquely identify a device
/// through OpenCL.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
typedef struct cldevrec_t {
cl_platform_id platform;
cl_device_id device;
} OPENCL_DEVICERECORD;
#endif
#ifdef CUDA_SUPPORT
///-------------------------------------------------------------------------------------------------
/// <summary> CUDA_DEVICERECORD: everything we have available to uniquely identify a device
/// through CUDA APIs.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
typedef struct cudevrec_t {
CUdevice device;
} CUDA_DEVICERECORD;
#endif
///-------------------------------------------------------------------------------------------------
/// <summary> Physical device object mapping a unique physical accelerator such as a GPU card
/// to Accelerator objects that use it through the various back-end runtimes that
/// PTask supports (DirectX, CUDA, OpenCL).
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class PhysicalDevice : public Lockable
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
PhysicalDevice();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~PhysicalDevice(void);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this physical device is busy. We need this at the physical device layer
/// because a physical device may be busy through it's CUDA accelerator interface but
/// not through its DirectX interface, for example.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <returns> true if busy, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsBusy();
///-------------------------------------------------------------------------------------------------
/// <summary> Mark this device as busy, meaning it is performing a dispatch for some Task.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="b"> true to b. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetBusy(BOOL b);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if the Accelerator 'pAccelerator' is a runtime-specific interface
/// for this physical device.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
///
/// <returns> true if same device, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsSameDevice(Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if 'pDevice' is same device. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pDevice"> [in,out] If non-null, the device. </param>
/// <param name="pDesc"> [in,out] If non-null, the description. </param>
/// <param name="nPlatformIndex"> Zero-based index of the n platform. </param>
///
/// <returns> true if same device, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsSameDevice(IDXGIAdapter * pDevice, DXGI_ADAPTER_DESC * pDesc, UINT nPlatformIndex);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if 'platform'/'device' is same device. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="platform"> The platform. </param>
/// <param name="device"> The device. </param>
/// <param name="nPlatformIndex"> Zero-based index of the n platform. </param>
///
/// <returns> true if same device, false if not. </returns>
///-------------------------------------------------------------------------------------------------
#ifdef OPENCL_SUPPORT
virtual BOOL IsSameDevice(cl_platform_id platform, cl_device_id device, UINT nPlatformIndex);
#endif
///-------------------------------------------------------------------------------------------------
/// <summary> Query if 'device' is same device. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="device"> The device. </param>
/// <param name="nPlatformIndex"> Zero-based index of the n platform. </param>
///
/// <returns> true if same device, false if not. </returns>
///-------------------------------------------------------------------------------------------------
#ifdef CUDA_SUPPORT
virtual BOOL IsSameDevice(CUdevice device, UINT nPlatformIndex);
#endif
///-------------------------------------------------------------------------------------------------
/// <summary> Adds an Accelerator interface to this physical device record. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL AddInterface(Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this physical device has an Accelerator interface that can be used
/// to execute tasks with the given accelerator class.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="cls"> The accelerator class. </param>
///
/// <returns> true if the device has an interface of the given class, false otherwise. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Supports(ACCELERATOR_CLASS cls);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets an accelerator interface on this physical device that can be used to execute
/// tasks of the given accelerator class. Return NULL if no such interface is present.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="cls"> The accelerator class. </param>
///
/// <returns> null if no appropriate interface is available, else the accelerator interface.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual Accelerator * GetAcceleratorInterface(ACCELERATOR_CLASS cls);
protected:
// note that we track a device entry per supported runtime: this is because the device may have
// support from only a subset of the runtimes (e.g. Tesla cards, which do not enumerate as
// Adapters and therefore enjoy OpenCL and CUDA support, but no DirectX support.
/// <summary> Data that uniquely identify the physical device
/// using DirectX/DXGI APIs. NULL if no DirectX support is available
/// for this physical device.
/// </summary>
DIRECTX_DEVICERECORD * m_pDirectXDevice;
/// <summary> Data that uniquely identify the physical device
/// using OpenCL APIs. NULL if no OpenCL support is available
/// for this physical device.
/// </summary>
#ifdef OPENCL_SUPPORT
OPENCL_DEVICERECORD * m_pOpenCLDevice;
#endif
/// <summary> Data that uniquely identify the physical device
/// using CUDA APIs. NULL if no CUDA support is available
/// for this physical device.
/// </summary>
#ifdef CUDA_SUPPORT
CUDA_DEVICERECORD * m_pCUDADevice;
#endif
/// <summary> The DirectX Accelerator object that maps to this physical
/// device. NULL if no DirectX support is available
/// for this physical device.
/// </summary>
DXAccelerator * m_pDXAccelerator;
/// <summary> The CUDA Accelerator object that maps to this physical
/// device. NULL if no CUDA support is available
/// for this physical device.
/// </summary>
#ifdef CUDA_SUPPORT
CUAccelerator * m_pCUAccelerator;
#endif
/// <summary> The OpenCL Accelerator object that maps to this physical
/// device. NULL if no OpenCL support is available
/// for this physical device.
/// </summary>
#ifdef OPENCL_SUPPORT
CLAccelerator * m_pCLAccelerator;
#endif
/// <summary> The Host Accelerator object that maps to this physical
/// device. Not used.
/// </summary>
HostAccelerator * m_pHostAccelerator;
/// <summary> true if this device is in flight, meaning it is currently
/// being used in the dispatch of a Task.
/// </summary>
BOOL m_bInFlight;
};
};

Просмотреть файл

@ -1,249 +0,0 @@
//--------------------------------------------------------------------------------------
// File: Recorder.h
// Copyright (c) Microsoft Corporation. All rights reserved.
//--------------------------------------------------------------------------------------
#ifndef _RECORDER_H_
#define _RECORDER_H_
#ifndef XMLSUPPORT
//namespace PTask {
// class BindDescriptorPort { public : BindDescriptorPort(void * pDescribedPort, void * pDescriberPort, int func) {} };
// class BindControlPort { public : BindControlPort(void * pDescribedPort, void * pDescriberPort, int func) {} };
// class BindControlPropagationPort { public : BindControlPropagationPort(void * pDescribedPort, void * pDescriberPort) {} };
// class SetPredicationType { public : SetPredicationType(void * pDescribedPort, int pDescriberPort, int func) {} };
// class SetComputeGeometry { public : SetComputeGeometry(void * pDescribedPort, int pDescriberPort, int func, int blah) {} };
// class Recorder { public: static void Record(void * action); };
//}
#define INITRECORDER()
#define DESTROYRECORDER()
#define RECORDACTION(x,y,z,w)
#define RECORDACTION2P(x,y,z)
#define RECORDACTION4P(x,y,z,w,t)
#else
#define INITRECORDER() Recorder::Initialize()
#define DESTROYRECORDER() Recorder::Destroy()
#define RECORDACTION(x,y,z,w) Recorder::Record(new PTask::##x((y),(z),(w)))
#define RECORDACTION2P(x,y,z) Recorder::Record(new PTask::##x((y),(z)))
#define RECORDACTION4P(x,y,z,w,t) Recorder::Record(new PTask::##x((y),(z),(w),(t)))
#include "XMLWriter.h"
#include "XMLReader.h"
#include "port.h"
namespace PTask {
class Graph;
class Task;
typedef enum _recorded_action_type {
BINDCONTROLPORT,
BINDCONTROLPROPAGATIONCHANNEL,
BINDCONTROLPROPAGATIONPORT,
BINDDESCRIPTORPORT,
BINDITERATIONSCOPE,
SETBLOCKANDGRIDSIZE,
SETCOMPUTEGEOMETRY,
SETPREDICATIONTYPE
} RECORDEDACTIONTYPE;
class RecordedAction {
public:
RecordedAction(RECORDEDACTIONTYPE type, std::string name);
virtual void Write(XMLWriter * writer)=0;
virtual void Read(XMLReader * reader)=0;
virtual void Replay(XMLReader * reader)=0;
const char * GetName();
virtual ~RecordedAction() { }
protected:
RECORDEDACTIONTYPE m_type;
std::string m_name;
};
class BindControlPort : public RecordedAction {
public:
BindControlPort();
BindControlPort(
Port * pController,
Port * pGatedPort,
BOOL bInitiallyOpen
);
virtual ~BindControlPort() { }
void Write(XMLWriter * writer);
void Read(XMLReader * reader);
void Replay(XMLReader * reader);
protected:
UINT m_controllerPortUID;
UINT m_gatedPortUID;
BOOL m_initiallyOpen;
};
class BindControlPropagationChannel : public RecordedAction {
public:
BindControlPropagationChannel();
BindControlPropagationChannel(
Port * pInputPort,
Channel * pControlledChannel
);
virtual ~BindControlPropagationChannel() { }
void Write(XMLWriter * writer);
void Read(XMLReader * reader);
void Replay(XMLReader * reader);
protected:
UINT m_inputPortUID;
std::string m_controlledChannelName;
};
class BindControlPropagationPort : public RecordedAction {
public:
BindControlPropagationPort();
BindControlPropagationPort(
Port * pInputPort,
Port * pOutputPort
);
virtual ~BindControlPropagationPort() { }
void Write(XMLWriter * writer);
void Read(XMLReader * reader);
void Replay(XMLReader * reader);
protected:
UINT m_inputPortUID;
UINT m_outputPortUID;
};
class BindDescriptorPort : public RecordedAction {
public:
BindDescriptorPort();
BindDescriptorPort(
Port * pDescribedPort,
Port * pDescriberPort,
DESCRIPTORFUNC func
);
virtual ~BindDescriptorPort() { }
void Write(XMLWriter * writer);
void Read(XMLReader * reader);
void Replay(XMLReader * reader);
protected:
UINT m_describedPortUID;
UINT m_describerPortUID;
DESCRIPTORFUNC m_func;
};
class BindIterationScope : public RecordedAction {
public:
BindIterationScope();
BindIterationScope(
Port * pMetaPort,
Port * pScopedPort
);
virtual ~BindIterationScope() { }
void Write(XMLWriter * writer);
void Read(XMLReader * reader);
void Replay(XMLReader * reader);
protected:
UINT m_metaPortUID;
UINT m_scopedPortUID;
};
class SetBlockAndGridSize : public RecordedAction {
public:
SetBlockAndGridSize();
SetBlockAndGridSize(
Task * task,
PTASKDIM3 grid,
PTASKDIM3 block
);
virtual ~SetBlockAndGridSize() { }
void Write(XMLWriter * writer);
void Read(XMLReader * reader);
void Replay(XMLReader * reader);
protected:
std::string m_taskName;
PTASKDIM3 m_grid;
PTASKDIM3 m_block;
};
class SetComputeGeometry : public RecordedAction {
public:
SetComputeGeometry();
SetComputeGeometry(
Task * task,
int tgx,
int tgy,
int tgz);
virtual ~SetComputeGeometry() { }
void Write(XMLWriter * writer);
void Read(XMLReader * reader);
void Replay(XMLReader * reader);
protected:
std::string m_taskName;
int m_tgx;
int m_tgy;
int m_tgz;
};
class SetPredicationType : public RecordedAction {
public:
SetPredicationType();
SetPredicationType(
Channel * pChannel,
CHANNELENDPOINTTYPE eEndpoint,
CHANNELPREDICATE eCanonicalPredicator
);
virtual ~SetPredicationType() { }
void Write(XMLWriter * writer);
void Read(XMLReader * reader);
void Replay(XMLReader * reader);
protected:
std::string m_channelName;
int m_endpointType;
int m_canonicalPredicate;
};
class Recorder {
public:
// HACK: Recorder is a singleton for now.
// TODO: Move to a Recorder per Graph, once can obain handle to Graph instance
// from all methods which want to record (such as methods on Port and Channel).
// One possible solution is to move all recordable actions to be methods on Graph.
static Recorder * Instance();
static void Record(RecordedAction * action);
static void Initialize();
static void Destroy();
RecordedAction * CreateAction(const char * actionName);
std::vector<RecordedAction *>* GetRecordedActions();
protected:
Recorder();
virtual ~Recorder();
Recorder(Recorder const&);
Recorder& operator=(Recorder const&);
void RecordAction(RecordedAction * action);
std::vector<RecordedAction *> m_vRecordedActions;
static Recorder * s_pInstance;
};
}; // namespace PTask
#endif
#endif

Просмотреть файл

@ -1,91 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: RefCountProfiler.h
//
// summary: Declares the reference count profiler class
///-------------------------------------------------------------------------------------------------
#ifndef __REFERENCE_COUNTED_PROFILER_H__
#define __REFERENCE_COUNTED_PROFILER_H__
#include <Windows.h>
#include <iostream>
#include <sstream>
#include <stdio.h>
#include <crtdbg.h>
#include <set>
#include "primitive_types.h"
namespace PTask {
class ReferenceCounted;
///-------------------------------------------------------------------------------------------------
/// <summary> Profiler class for reference counted objects
/// </summary>
///
/// <remarks> Crossbac, 7/18/2013. </remarks>
///-------------------------------------------------------------------------------------------------
class ReferenceCountedProfiler
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes the refcount profiler. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL Initialize(BOOL bEnable);
///-------------------------------------------------------------------------------------------------
/// <summary> Deinitializes the refcount profiler. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void Deinitialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps the refcount profiler leaks. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void Report(std::ostream& ss);
///-------------------------------------------------------------------------------------------------
/// <summary> Profile allocation. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the item. </param>
///-------------------------------------------------------------------------------------------------
static void RecordAllocation(ReferenceCounted * pItem);
///-------------------------------------------------------------------------------------------------
/// <summary> Profile deletion. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the item. </param>
///-------------------------------------------------------------------------------------------------
static void RecordDeletion(ReferenceCounted * pItem);
protected:
static LONG m_nRCAllocations;
static LONG m_nRCDeletions;
static LONG m_nRCProfilerInit;
static LONG m_nRCProfilerEnable;
static LONG m_nRCProfilerIDCount;
static CRITICAL_SECTION m_csRCProfiler;
static std::set<PTask::ReferenceCounted*> m_vAllAllocations;
};
};
#endif // __REFERENCE_COUNTED_PROFILER_H__

Просмотреть файл

@ -1,172 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: ReferenceCounted.h
//
// summary: Declares the reference counted class
///-------------------------------------------------------------------------------------------------
#ifndef __REFERENCE_COUNTED_H__
#define __REFERENCE_COUNTED_H__
#include <Windows.h>
#include <iostream>
#include <sstream>
#include <stdio.h>
#include <crtdbg.h>
#include <set>
#include "primitive_types.h"
#include "Lockable.h"
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> Reference counted super-class, allowing to share implementation of ref count
/// management code.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class ReferenceCounted : public Lockable
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///-------------------------------------------------------------------------------------------------
ReferenceCounted();
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="lpszProtectedObjectName"> [in] non-null, name of the protected object. </param>
///-------------------------------------------------------------------------------------------------
ReferenceCounted(char * lpszProtectedObjectName);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~ReferenceCounted();
///-------------------------------------------------------------------------------------------------
/// <summary> Adds a reference. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual LONG AddRef();
///-------------------------------------------------------------------------------------------------
/// <summary> Release a reference. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual LONG Release();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the reference count. (for debugging only) </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <returns> current reference count for the object. </returns>
///-------------------------------------------------------------------------------------------------
LONG RefCount();
///-------------------------------------------------------------------------------------------------
/// <summary> Datablock.toString() </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="os"> [in,out] The operating system. </param>
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
///
/// <returns> The shifted result. </returns>
///-------------------------------------------------------------------------------------------------
friend std::ostream& operator<<(std::ostream &os, ReferenceCounted * pBlock);
protected:
/// <summary> Number of outanding references to this object.
/// When m_uiRefCount drops to zero, it will be
/// garbage collected. NB: Ideally, the refcount would be private. However,
/// class Datablock inherits from ReferenceCounted but has to override Release to return blocks to
/// their block pools rather than deleting them (if they are pooled). Doing this requires the
/// ability to do interlocked operations on the m_uiRefCount member of the super-class. A sad
/// side effect of this is that we are forced to make m_uiRefCount protected rather than private.
/// </summary>
LONG m_uiRefCount;
public:
/// <summary> The unique id of this RC object. </summary>
LONG m_uiUID;
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes the refcount profiler. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL RCProfileInitialize(BOOL bEnable);
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps the refcount profiler leaks. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void RCProfileDumpLeaks();
///-------------------------------------------------------------------------------------------------
/// <summary> Profile allocation. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the item. </param>
///-------------------------------------------------------------------------------------------------
static void RCProfileAllocation(ReferenceCounted * pItem);
///-------------------------------------------------------------------------------------------------
/// <summary> Profile deletion. </summary>
///
/// <remarks> Crossbac, 2/24/2012. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the item. </param>
///-------------------------------------------------------------------------------------------------
static void RCProfileDeletion(ReferenceCounted * pItem);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a string describing this refcount object. Allows subclasses to
/// provide overrides that make leaks easier to find when detected by the
/// rc profiler.
/// </summary>
///
/// <remarks> Crossbac, 7/9/2013. </remarks>
///
/// <returns> null if it fails, else the rectangle profile descriptor. </returns>
///-------------------------------------------------------------------------------------------------
virtual std::string GetRCProfileDescriptor();
};
};
#endif // __REFERENCE_COUNTED_H__

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,249 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: ScopedPoolManager.h
//
// summary: Declares the scoped pool manager class
///-------------------------------------------------------------------------------------------------
#ifndef __SCOPED_POOL_MANAGER__
#define __SCOPED_POOL_MANAGER__
#include <stdio.h>
#include <crtdbg.h>
#include <Windows.h>
#include "datablock.h"
#include "GlobalBlockPool.h"
#include "ptlock.h"
#include <deque>
#include <map>
#include <tuple>
namespace PTask {
class CompiledKernel;
class Graph;
class Channel;
class Port;
class Task;
class Datablock;
class DatablockTemplate;
class ScopedPoolManager : public Lockable {
typedef std::tuple<DatablockTemplate*, int, int, int, int> POOLDESCRIPTOR;
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 3/27/2014. </remarks>
///-------------------------------------------------------------------------------------------------
ScopedPoolManager(Graph * pScopedGraph);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 3/27/2014. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~ScopedPoolManager();
///-------------------------------------------------------------------------------------------------
/// <summary> Require block pool. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="nDataSize"> Size of the data. </param>
/// <param name="nMetaSize"> Size of the meta. </param>
/// <param name="nTemplateSize"> Size of the template. </param>
/// <param name="nBlocks"> (Optional) The blocks. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
RequireBlockPool(
__in DatablockTemplate * pTemplate,
__in int nDataSize,
__in int nMetaSize,
__in int nTemplateSize,
__in int nBlocks=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Require block pool. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="nDataSize"> Size of the data. </param>
/// <param name="nMetaSize"> Size of the meta. </param>
/// <param name="nTemplateSize"> Size of the template. </param>
/// <param name="nBlocks"> (Optional) The blocks. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
RequireBlockPool(
__in int nDataSize,
__in int nMetaSize,
__in int nTemplateSize,
__in int nBlocks=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Require block pool. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="nBlocks"> (Optional) The blocks. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
RequireBlockPool(
__in DatablockTemplate * pTemplate,
__in int nBlocks=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Find a block pool for the block. If there is no good fit,
/// create one if the bCreateIfNotFound flag is set.
/// </summary>
///
/// <remarks> crossbac, 8/30/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
/// <param name="bCreateIfNotFound"> The create if not found. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
AddBlockToBestFitPool(
__in Datablock * pBlock,
__in BOOL bCreateIfNotFound
);
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if we can allocate pools. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL AllocatePools();
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the pools. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL DestroyPools();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate datablock. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="uiDataSize"> Size of the data. </param>
/// <param name="uiMetaSize"> Size of the meta. </param>
/// <param name="uiTemplateSize"> Size of the template. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
Datablock *
AllocateDatablock(
__in DatablockTemplate * pTemplate,
__in UINT uiDataSize,
__in UINT uiMetaSize,
__in UINT uiTemplateSize
);
///-------------------------------------------------------------------------------------------------
/// <summary> Request a pooled block. </summary>
///
/// <remarks> crossbac, 8/21/2013. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="uiDataSize"> Size of the data. </param>
/// <param name="uiMetaSize"> Size of the meta. </param>
/// <param name="uiTemplateSize"> Size of the template. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
Datablock *
RequestBlock(
__in DatablockTemplate * pTemplate,
__in UINT uiDataSize,
__in UINT uiMetaSize,
__in UINT uiTemplateSize
);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Searches for the first matching pool. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="uiDataSize"> Size of the data. </param>
/// <param name="uiMetaSize"> Size of the meta. </param>
/// <param name="uiTemplateSize"> Size of the template. </param>
/// <param name="uiBlockControlCode"> The block control code. </param>
///
/// <returns> null if it fails, else the found matching pool. </returns>
///-------------------------------------------------------------------------------------------------
GlobalBlockPool *
FindMatchingPool(
__in DatablockTemplate * pTemplate,
__in UINT uiDataSize,
__in UINT uiMetaSize,
__in UINT uiTemplateSize
);
///-------------------------------------------------------------------------------------------------
/// <summary> Find a block pool for the block. If there is no good fit,
/// create one if the bCreateIfNotFound flag is set.
/// </summary>
///
/// <remarks> crossbac, 8/30/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
/// <param name="bCreateIfNotFound"> The create if not found. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
__AddBlockToBestFitPool(
__in Datablock * pBlock,
__in BOOL bCreateIfNotFound
);
Graph * m_pGraph;
BOOL m_bPoolsAllocated;
BOOL m_bDestroyed;
std::map<int, POOLDESCRIPTOR> m_vRequiredPoolsUntyped;
std::map<DatablockTemplate*, POOLDESCRIPTOR> m_vRequiredPoolsTyped;
std::map<int, GlobalBlockPool*> m_vUntypedBlockPools;
std::map<DatablockTemplate*, GlobalBlockPool*> m_vTypedBlockPools;
};
};
#endif

Просмотреть файл

@ -1,377 +0,0 @@
//--------------------------------------------------------------------------------------
// File: StickyPort.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _STICKY_PORT_H_
#define _STICKY_PORT_H_
#include "primitive_types.h"
#include "port.h"
#include <vector>
#include <sstream>
namespace PTask {
class Channel;
class Datablock;
class Accelerator;
class DatablockTemplate;
///-------------------------------------------------------------------------------------------------
/// <summary> Sticky port. A port that is bound to scalar values in kernel code, with read-only
/// semantics. Typically these values are bound to constant memory on a device where
/// specialized memories are available. A sticky port also retains its last value: if
/// no new datablock is available on its incoming channel it will redeliver the last
/// datablock pulled from it on the next call to Pull.
/// </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
class StickyPort : public Port {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
StickyPort();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~StickyPort();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is occupied. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <returns> true if occupied, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsOccupied();
///-------------------------------------------------------------------------------------------------
/// <summary> Pulls the next datablock from this port. Return the last datablock if no new
/// block is available on the incoming channel.
/// </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Pull();
///-------------------------------------------------------------------------------------------------
/// <summary> Returns the datablock that would be returned by the next call to Pull, without
/// removing it from the port.
/// </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <returns> null if it fails, else the current top-of-stack object. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Peek();
///-------------------------------------------------------------------------------------------------
/// <summary> The push method is required by the abstract class Port, but has no meaning for
/// sticky ports. This method is a no-op for StickyPort.
/// </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <param name="p"> [in,out] If non-null, the Datablock* to push. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Push(Datablock* p);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the destination datablock for this port. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
///
/// <returns> null if it fails, else the destination buffer. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * GetDestinationBuffer(Accelerator * pAccelerator=NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a destination buffer. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <param name=""> [in,out] If non-null, the. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetDestinationBuffer(Datablock *);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a block to be the permanently sticky block for this port. Obviously, only
/// valid for certain kinds of ports (input varieties). Use for blocks that will have
/// only one value for the lifetime of the graph, to avoid creating and manageing an
/// exposed channel or initializer channel that will only every be used once. Do not
/// connect an upstream channel to ports that have been configured with a permanent
/// block.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="p"> If non-null, the Datablock* to push. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetPermanentBlock(Datablock * p);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has block pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if block pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasBlockPool();
///-------------------------------------------------------------------------------------------------
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
/// we have an upstream allocator (meta) port, the runtime will not create a block
/// pool for the corresponding output port. This turns out to put device-side
/// allocation on the critical path in some cases, so we provide a way to override
/// that behavior and allow a port to create a pool based on some size hints. When
/// there is a block available with sufficient space in the pool, the meta port can
/// avoid the allocation and draw from the pool.
/// </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///
/// <param name="nPoolSize"> Size of the block pool. </param>
/// <param name="nStride"> The stride. </param>
/// <param name="nDataBytes"> The data in bytes. </param>
/// <param name="nMetaBytes"> The meta in bytes. </param>
/// <param name="nTemplateBytes"> The template in bytes. </param>
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
///-------------------------------------------------------------------------------------------------
virtual void
ForceBlockPoolHint(
__in UINT nPoolSize,
__in UINT nStride,
__in UINT nDataBytes,
__in UINT nMetaBytes,
__in UINT nTemplateBytes,
__in BOOL bPageLockHostViews=FALSE,
__in BOOL bEagerDeviceMaterialize=FALSE
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
///
/// Allocation of data-blocks and platform-specific buffers can be a signficant
/// latency expense at dispatch time. We can actually preallocate output datablocks
/// and create device- side buffers at graph construction time. For each node in the
/// graph, allocate data blocks on any output ports, and create device-specific
/// buffers for all accelerators capable of executing the node.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPool(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
/// to null, and then release it. </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void
DestroyBlockPool(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> true if a block pool is active, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
IsBlockPoolActive(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles only the first pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPoolAsync(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles the second pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
FinalizeBlockPoolAsync(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> add a new block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void AddNewBlock(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> return a block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void ReturnToPool(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> The pool size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetPoolSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetRequestsPageLocked(BOOL bPageLocked);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL GetRequestsPageLocked();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates this object. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
/// <param name="uiId"> The identifier. </param>
/// <param name="lpszVariableBinding"> [in,out] If non-null, the variable binding. </param>
/// <param name="nParmIdx"> Zero-based index of the n parm. </param>
/// <param name="nInOutRouteIdx"> Zero-based index of the n in out route. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
static Port * Create(DatablockTemplate * pTemplate,
UINT uiId,
char * lpszVariableBinding,
int nParmIdx,
int nInOutRouteIdx
);
///-------------------------------------------------------------------------------------------------
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
/// for this port in a way that is consistent with a well-formed graph. Called by
/// CheckSemantics()
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="pos"> [in,out] output string stream. </param>
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
PTask::Graph * pGraph);
protected:
/// <summary> The sticky datablock </summary>
Datablock * m_pStickyDatablock;
};
};
#endif

Просмотреть файл

@ -1,230 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: SyncPoint.h
//
// summary: Declares the synchronise point class
///-------------------------------------------------------------------------------------------------
#ifndef __SYNC_POINT_H__
#define __SYNC_POINT_H__
#include "primitive_types.h"
#include "ReferenceCounted.h"
namespace PTask {
class AsyncContext;
///-------------------------------------------------------------------------------------------------
/// <summary> A synchronization point, on which dependences may be created, so that other
/// threads/downstream operations can wait until dependences on previous operations
/// in this context have resolved.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///-------------------------------------------------------------------------------------------------
class SyncPoint : public ReferenceCounted {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> crossbac, 5/25/2012. </remarks>
///
/// <param name="_pAsyncContext"> [in] If non-null, context for the asynchronous. </param>
/// <param name="_pPlatformAsyncContextObject"> [in] non-null, the platform-specific asynchronous
/// context object. E.g. the stream in CUDA, the
/// ID3D11ImmediateContext object in DirectX and so
/// on. </param>
/// <param name="_pPlatformAsyncWaitObject"> [in] non-null, a platform-specific asynchronous
/// wait object. E.g. a windows event or a cuda event
/// object, etc. </param>
/// <param name="_pPlatformParentSyncObject"> The platform parent synchronise object. </param>
///-------------------------------------------------------------------------------------------------
SyncPoint(
__in AsyncContext * _pAsyncContext,
__in void * _pPlatformAsyncContextObject,
__in void * _pPlatformAsyncWaitObject,
__in void * _pPlatformParentSyncObject
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~SyncPoint();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform context object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the platform context object. </returns>
///-------------------------------------------------------------------------------------------------
void * GetPlatformContextObject();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform wait object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the platform wait object. </returns>
///-------------------------------------------------------------------------------------------------
void * GetPlatformWaitObject();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform wait object. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> null if it fails, else the platform wait object. </returns>
///-------------------------------------------------------------------------------------------------
void * GetPlatformParentObject();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this sync point is *definitely* resolved. If this returns false, then
/// the sync point represents completed work and no lock is required to check this
/// since the transition is monotonic. If it returns TRUE indicating the work is
/// still outstanding, that doesn't mean the sync point hasn't resolved. It just
/// means the caller should acquire locks and call QueryOutstanding to get a higher
/// fidelity answer.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> true if outstanding, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
QueryOutstandingFlag(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this sync point represents outstanding work or work that has been
/// completed.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
///
/// <returns> true if outstanding, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
QueryOutstanding(
__in AsyncContext * pAsyncContext
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this sync point represents outstanding work or work that has been
/// completed without blocking to acquire the locks needed to update async context
/// and accelerator state when a state change on this sync point is detected.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> true if outstanding, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
NonblockingQueryOutstanding(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Marks this sync point as retired, meaning all the ops preceding it
/// are known to be complete. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void
MarkRetired(
__in BOOL bContextSynchronized,
__in BOOL bStatusQueried
);
/////-------------------------------------------------------------------------------------------------
///// <summary> Release by decrementing the refcount. We override the implementation inherited
///// from ReferenceCounted so that we can figure out if the outstanding list
///// for the containing async context can be garbage collected. If the refcount
///// goes from 2 to 1, that *should* mean that its async context holds the only
///// reference, and therefor we can retire it.
///// </summary>
/////
///// <remarks> Crossbac, 12/19/2011. </remarks>
/////
///// <returns> . </returns>
/////-------------------------------------------------------------------------------------------------
//virtual LONG Release();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets asynchronous context. </summary>
///
/// <remarks> crossbac, 5/1/2013. </remarks>
///
/// <returns> null if it fails, else the asynchronous context. </returns>
///-------------------------------------------------------------------------------------------------
AsyncContext * GetAsyncContext();
protected:
/// <summary> The platform-specific asynchronous context object.
/// Maps loosely to the abstraction of an independent command
/// queue for a given device context.
/// </summary>
void * m_pPlatformAsyncContextObject;
/// <summary> The platform-specific asynchronous wait object. </summary>
void * m_pPlatformAsyncWaitObject;
/// <summary> The platform parent synchronisation object--not used by all platforms. </summary>
void * m_pPlatformParentSyncObject;
/// <summary> Context for the outstanding asynchronous operations. </summary>
AsyncContext * m_pAsyncContext;
/// <summary> true if ops preceding this sync-point are known to
/// be outstanding (or rather, conservatively, not known
/// to be complete). </summary>
BOOL m_bOutstanding;
/// <summary> true if we queried the underlying event to figure out
/// that the sync point was no longer outstanding. </summary>
BOOL m_bStatusQueried;
/// <summary> true if the context was synchronized, causing the
/// sync point to be no longer outstanding.
/// </summary>
BOOL m_bContextSynchronized;
friend class AsyncContext;
friend class AsyncDependence;
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a string describing this refcount object. Allows subclasses to
/// provide overrides that make leaks easier to find when detected by the
/// rc profiler.
/// </summary>
///
/// <remarks> Crossbac, 7/9/2013. </remarks>
///
/// <returns> null if it fails, else the rectangle profile descriptor. </returns>
///-------------------------------------------------------------------------------------------------
virtual std::string GetRCProfileDescriptor();
};
};
#endif

Просмотреть файл

@ -1,347 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: ThreadPool.h
//
// summary: Declares the thread pool class
///-------------------------------------------------------------------------------------------------
#ifndef __THREAD_POOL_H__
#define __THREAD_POOL_H__
#include <stdio.h>
#include <crtdbg.h>
#include <Windows.h>
#include <deque>
#include <map>
#include <set>
#include "Lockable.h"
#include "PTaskRuntime.h"
namespace PTask {
class ThreadPool;
class THREADDESC {
public:
CRITICAL_SECTION lock;
HANDLE hThread;
HANDLE hStartEvent;
HANDLE hTerminateEvent;
BOOL bRoutineValid;
BOOL bTerminate;
BOOL bActive;
LPTHREAD_START_ROUTINE lpRoutine;
LPVOID lpParameter;
BOOL bDeleteOnThreadExit;
BOOL bRemoveFromPoolOnThreadExit;
ThreadPool * pThreadPool;
THREADDESC(ThreadPool*pPool) {
InitializeCriticalSection(&lock);
hThread = INVALID_HANDLE_VALUE;
hStartEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
hTerminateEvent = PTask::Runtime::GetRuntimeTerminateEvent();
bRoutineValid = FALSE;
bTerminate = FALSE;
bActive = FALSE;
lpRoutine = NULL;
lpParameter = NULL;
bDeleteOnThreadExit = FALSE;
bRemoveFromPoolOnThreadExit = FALSE;
pThreadPool = pPool;
}
~THREADDESC() {
DeleteCriticalSection(&lock);
}
void Lock() { EnterCriticalSection(&lock); }
void Unlock() { LeaveCriticalSection(&lock); }
};
class ThreadPool : public Lockable {
static const int DEFGROWINC=2;
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="nThreads"> If non-null, the p. </param>
/// <param name="bPrimeThreads"> The prime threads. </param>
/// <param name="bGrowable"> The growable. </param>
/// <param name="uiGrowIncrement"> The grow increment. </param>
///-------------------------------------------------------------------------------------------------
ThreadPool(
__in UINT nThreads,
__in BOOL bPrimeThreads,
__in BOOL bGrowable,
__in UINT uiGrowIncrement
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~ThreadPool();
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Creates this object. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///
/// <param name="uiThreads"> The threads. </param>
/// <param name="bPrimeThreads"> The threads. </param>
/// <param name="bGrowable"> The growable. </param>
/// <param name="uiGrowIncrement"> The grow increment. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
static ThreadPool *
Create(
__in UINT uiThreads,
__in BOOL bPrimeThreads,
__in BOOL bGrowable,
__in UINT uiGrowIncrement=DEFGROWINC
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys this object. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void Destroy();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> The pool size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetPoolSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> The pool size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetCurrentPoolSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets target pool size. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///
/// <returns> The target pool size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetTargetPoolSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets pool size. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///
/// <param name="uiThreads"> The threads. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetPoolSize(UINT uiThreads);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets grow increment. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///
/// <returns> The grow increment. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetGrowIncrement();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets grow increment. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///
/// <param name="uiIncrement"> Amount to increment by. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetGrowIncrement(UINT uiIncrement);
///-------------------------------------------------------------------------------------------------
/// <summary> Request thread. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///
/// <param name="lpRoutine"> The routine. </param>
/// <param name="lpParameter"> The parameter. </param>
/// <param name="bStartThread"> true if the thread can be signaled to start
/// before returning from this call, false if the
/// caller would prefer to signal it explicitly. </param>
///
/// <returns> The handle of the thread. </returns>
///-------------------------------------------------------------------------------------------------
static HANDLE
RequestThread(
__in LPTHREAD_START_ROUTINE lpRoutine,
__in LPVOID lpParameter,
__in BOOL bStartThread
);
///-------------------------------------------------------------------------------------------------
/// <summary> Starts a thread: if a previous call to RequestThread was made with
/// the bStartThread parameter set to false, this API signals the thread
/// to begin. Otherwise, the call has no effect (returns FALSE). </summary>
///
/// <remarks> crossbac, 8/29/2013. </remarks>
///
/// <param name="hThread"> The thread. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
StartThread(
__in HANDLE hThread
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a thread. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///
/// <param name="lpRoutine"> The routine. </param>
/// <param name="lpParameter"> The parameter. </param>
///
/// <returns> The thread. </returns>
///-------------------------------------------------------------------------------------------------
HANDLE
GetThread(
__in LPTHREAD_START_ROUTINE lpRoutine,
__in LPVOID lpParameter,
__in BOOL bStartThread
);
///-------------------------------------------------------------------------------------------------
/// <summary> Starts a thread: if a previous call to RequestThread was made with
/// the bStartThread parameter set to false, this API signals the thread
/// to begin. Otherwise, the call has no effect (returns FALSE). </summary>
///
/// <remarks> crossbac, 8/29/2013. </remarks>
///
/// <param name="hThread"> The thread. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
SignalThread(
__in HANDLE hThread
);
///-------------------------------------------------------------------------------------------------
/// <summary> Thread pool proc. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pVoidCastGraph"> the graph object, typecast to void* </param>
///
/// <returns> DWORD: 0 on thread exit. </returns>
///-------------------------------------------------------------------------------------------------
static DWORD WINAPI _ThreadPoolProc(LPVOID pDesc);
///-------------------------------------------------------------------------------------------------
/// <summary> Thread pool proc. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///
/// <param name="pDesc"> The description. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
DWORD
ThreadPoolProc(
__in THREADDESC * pDesc
);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Notifies a thread alive. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///
/// <param name="hThread"> Handle of the thread. </param>
///-------------------------------------------------------------------------------------------------
void NotifyThreadAlive(HANDLE hThread);
///-------------------------------------------------------------------------------------------------
/// <summary> Notifies a thread exit. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///
/// <param name="hThread"> Handle of the thread. </param>
///-------------------------------------------------------------------------------------------------
void NotifyThreadExit(HANDLE hThread);
///-------------------------------------------------------------------------------------------------
/// <summary> Wait threads alive. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void WaitThreadsAlive();
///-------------------------------------------------------------------------------------------------
/// <summary> Starts the threads. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void StartThreads(UINT uiThreads, BOOL bWaitAlive);
///-------------------------------------------------------------------------------------------------
/// <summary> Prime thread. </summary>
///
/// <remarks> crossbac, 8/22/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void PrimeThread();
std::map<HANDLE, THREADDESC*> m_vhThreadDescs;
std::deque<HANDLE> m_vhAvailable;
std::set<HANDLE> m_vhInFlight;
std::set<HANDLE> m_vhWaitingStartSignal;
std::set<THREADDESC*> m_vZombieThreadDescs;
UINT m_uiThreads;
UINT m_uiTargetSize;
BOOL m_bPrimeThreads;
BOOL m_bGrowable;
UINT m_uiGrowIncrement;
UINT m_uiThreadsAlive;
HANDLE m_hAllThreadsAlive;
HANDLE m_hAllThreadsExited;
UINT m_uiAliveWaiters;
UINT m_uiExitWaiters;
BOOL m_bExiting;
static ThreadPool * g_pThreadPool;
};
};
#endif

Просмотреть файл

@ -1,46 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: Tracer.h
//
// summary: Declares the tracer class
///-------------------------------------------------------------------------------------------------
#ifndef __PTASK_TRACER_H__
#define __PTASK_TRACER_H__
#include <Windows.h>
#include <iostream>
#include <sstream>
#include <wmistr.h>
#include <evntrace.h>
#include "PTaskRuntime.h"
namespace PTask {
namespace Runtime {
// Dynamically linked etw logging function.
typedef ULONG (WINAPI *LPETWSETMARK)( HANDLE, LPVOID, ULONG );
#pragma prefast( suppress:__WARNING_ENCODE_GLOBAL_FUNCTION_POINTER, "This call needs to be performant" );
static LPETWSETMARK gs_pEtwSetMark = NULL ;
#define TRACER_MAX_MSG_LEN 64
typedef struct _ETW_SET_MARK_INFORMATION {
ULONG Flag;
CHAR Mark[TRACER_MAX_MSG_LEN];
} ETW_SET_MARK_INFORMATION;
class Tracer
{
public:
Tracer(void);
virtual ~Tracer(void);
static VOID EtwSetMarkA(char *msg);
static ULONG LogDispatchEvent(char * lpszTaskName, BOOL bStart, UINT uiAcceleratorId, UINT uiDispatchNumber);
static ULONG LogBufferSyncEvent(void * pbufferInstance, BOOL bStart, void * parentDatablock, UINT uiAcceleratorId);
private:
static VOID Tracer::InitializeETW();
};
};
};
#endif

Просмотреть файл

@ -1,138 +0,0 @@
//--------------------------------------------------------------------------------------
// File: XMLReadr.h
// Copyright (c) Microsoft Corporation. All rights reserved.
//--------------------------------------------------------------------------------------
#ifndef _XMLREADER_H_
#define _XMLREADER_H_
#ifdef XMLSUPPORT
#ifdef _DEBUG
#define _CRTDBG_MAP_ALLOC
#include <stdlib.h>
#include <crtdbg.h>
#endif
#include <vector>
#include <map>
using namespace std;
#include <ole2.h>
#include <xmllite.h>
#include <stdio.h>
#include <shlwapi.h>
/*#include "graph.h"
#include "datablock.h"
#include "datablocktemplate.h"
#include "CompiledKernel.h"
#include "primitive_types.h"
*/
#include "primitive_types.h"
#include "PTaskRuntime.h"
#include "channel.h"
namespace PTask {
class XMLReaderException: public std::exception {};
class XMLReader
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> jcurrey, 5/8/2013. </remarks>
///
/// <param name="filename"> The name of the file to read XML from. </param>
///-------------------------------------------------------------------------------------------------
XMLReader(const char * filename);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> jcurrey, 5/8/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~XMLReader();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the graph. </summary>
///
/// <remarks> jcurrey, originally </remarks>
///
/// <returns> null if it fails, else the graph. </returns>
///-------------------------------------------------------------------------------------------------
Graph * GetGraph();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a port. </summary>
///
/// <remarks> jcurrey, originally </remarks>
///
/// <param name="portUID"> The port UID. </param>
///
/// <returns> null if it fails, else the port. </returns>
///-------------------------------------------------------------------------------------------------
Port * GetPort(UINT portUID);
///-------------------------------------------------------------------------------------------------
/// <summary> Reads a graph. </summary>
///
/// <remarks> jcurrey, originally. </remarks>
///
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL ReadGraph(Graph * pGraph);
BOOL ReadStringElement(const char * elementName, std::string& cvalue);
int ReadIntegerElement(const char * elementName);
UINT ReadUINTElement(const char * elementName);
bool ReadBooleanElement(const char * elementName);
protected:
const char* ReadTextElement(const char * elementName);
BOOL ReadTemplates();
BOOL ReadKernels();
BOOL ReadTasks();
BOOL ReadChannels();
BOOL ReadActions();
DatablockTemplate * ReadDatablockTemplate();
CompiledKernel * ReadCompiledKernel(int& kernelID);
Task * ReadTask();
Port * ReadPort();
Channel * ReadChannel();
BOOL ReadNextNode(XmlNodeType requiredType);
BOOL ReadElementStartTag(const char * requiredElementName);
BOOL ReadElementText(const char *& text);
BOOL ReadElementEndTag(const char * requiredElementName);
const wchar_t * AllocWideStringCopy(const char * str);
const char * AllocStringCopy(LPCWSTR strW);
void FreeWideString(const wchar_t * str);
void FreeString(const char * str);
IStream * m_pInFileStream;
IXmlReader * m_pReader;
Graph * m_pGraph;
map<string, DatablockTemplate *> m_templateMap;
map<int, CompiledKernel *> m_kernelMap;
map<UINT, Port *> m_portMap;
std::set<const wchar_t*> m_wAllocs;
std::set<const char*> m_cAllocs;
};
};
#endif
#endif

Просмотреть файл

@ -1,91 +0,0 @@
//--------------------------------------------------------------------------------------
// File: XMLWriter.h
// Copyright (c) Microsoft Corporation. All rights reserved.
//--------------------------------------------------------------------------------------
#ifndef _XMLWRITER_H_
#define _XMLWRITER_H_
#ifdef XMLSUPPORT
#ifdef _DEBUG
#define _CRTDBG_MAP_ALLOC
#include <stdlib.h>
#include <crtdbg.h>
#endif
#include <vector>
#include <map>
#include <ole2.h>
#include <xmllite.h>
#include <stdio.h>
#include <shlwapi.h>
/*#include "graph.h"
#include "datablock.h"
#include "datablocktemplate.h"
#include "CompiledKernel.h"
#include "primitive_types.h"
*/
#include "primitive_types.h"
#include "PTaskRuntime.h"
#include "channel.h"
namespace PTask {
class XMLWriterException: public std::exception {};
class XMLWriter
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> jcurrey, 5/5/2013. </remarks>
///
/// <param name="filename"> The name of the file to write XML to. </param>
///-------------------------------------------------------------------------------------------------
XMLWriter(const char * filename);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> jcurrey, 5/5/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~XMLWriter();
void WriteElementStartTag(const char * elementName);
void WriteElementText(const char * text);
void WriteElementEndTag();
void WriteComment(const char * comment);
void WriteEndDocument();
void WriteElement(const char * elementName, const char * text);
void WriteElement(const char * elementName, int elementValue);
void WriteElement(const char * elementName, unsigned int elementValue);
void WriteElement(const char * elementName, bool elementValue);
void WriteGraph(Graph * pGraph);
void WriteDatablockTemplate(DatablockTemplate * pTemplate);
void WriteCompiledKernel(CompiledKernel * pCompiledKernel, int kernelID);
void WriteTask(Task * pTask, int kernelID);
void WritePorts(std::map<UINT, Port*>* pPorts);
void WritePort(Port * pPort);
void WriteControlPropagationInfo(Port * pPort);
void WriteChannel(Channel * pChannel);
void WriteChannelEndpointPredication(Channel * pChannel, CHANNELENDPOINTTYPE eEndpoint);
protected:
const wchar_t * ToWChar(const char * str);
IStream * m_pOutFileStream;
IXmlWriter * m_pWriter;
};
};
#endif
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,447 +0,0 @@
//--------------------------------------------------------------------------------------
// File: claccelerator.h
// Accelerator built on OpenCL interface
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _CLACCELERATOR_H_
#define _CLACCELERATOR_H_
#ifdef OPENCL_SUPPORT
#include "primitive_types.h"
#include "oclhdr.h"
#include "accelerator.h"
#include <vector>
namespace PTask {
class CLAccelerator : public Accelerator
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="lpszName"> [in,out] If non-null, the name. </param>
///-------------------------------------------------------------------------------------------------
CLAccelerator(char * lpszName);
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="id"> The identifier. </param>
/// <param name="lpszName"> [in,out] If non-null, the name. </param>
///-------------------------------------------------------------------------------------------------
CLAccelerator(cl_device_id id, char * lpszName);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~CLAccelerator();
///-------------------------------------------------------------------------------------------------
/// <summary> Open the OpenCL accelerator. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
HRESULT Open();
///-------------------------------------------------------------------------------------------------
/// <summary> Open the OpenCL accelerator. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name=""> (optional) the. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
HRESULT Open(cl_device_id=NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the device. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> null if it fails, else the device. </returns>
///-------------------------------------------------------------------------------------------------
void* GetDevice();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the context. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> null if it fails, else the context. </returns>
///-------------------------------------------------------------------------------------------------
void* GetContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates an asynchronous context for the task. Create the cuda stream for this
/// ptask.
/// </summary>
///
/// <remarks> crossbac, 12/20/2011.
///
/// This method is required of all subclasses, and abstracts the work associated with
/// managing whatever framework-level asynchrony abstractions are supported by the
/// backend target. For example, CUDA supports the "stream", while DirectX supports
/// an ID3D11ImmediateContext, OpenCL has command queues, and so on.
/// </remarks>
///
/// <param name="pTask"> [in] non-null, the CUDA-capable acclerator to which the
/// stream is bound. </param>
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual AsyncContext *
PlatformSpecificCreateAsyncContext(
__in Task * pTask,
__in ASYNCCONTEXTTYPE eAsyncContextType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the OpenCL command queue. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> The queue. </returns>
///-------------------------------------------------------------------------------------------------
cl_command_queue GetQueue();
///-------------------------------------------------------------------------------------------------
/// <summary> Compiles. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="lpszFileName"> [in,out] If non-null, filename of the file. </param>
/// <param name="lpszKernelName"> [in,out] If non-null, name of the kernel. </param>
/// <param name="ppPlatformSpecificBinary"> [in,out] If non-null, the platform specific binary. </param>
/// <param name="ppPlatformSpecificModule"> [in,out] If non-null, the platform specific module. </param>
/// <param name="lpszCompilerOutput"> (optional) [in,out] If non-null, the compiler output. </param>
/// <param name="uiCompilerOutput"> (optional) the compiler output. </param>
/// <param name="threadGroupSizeX"> (optional) the thread group size x coordinate. </param>
/// <param name="threadGroupSizeY"> (optional) the thread group size y coordinate. </param>
/// <param name="threadGroupSizeZ"> (optional) the thread group size z coordinate. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Compile(
char * lpszFileName,
char * lpszKernelName,
void ** ppPlatformSpecificBinary,
void ** ppPlatformSpecificModule,
char * lpszCompilerOutput=NULL,
int uiCompilerOutput=0,
int threadGroupSizeX=1,
int threadGroupSizeY=1,
int threadGroupSizeZ=1
);
///-------------------------------------------------------------------------------------------------
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
///
/// <remarks> Crossbac, 12/17/2011.
///
/// The function accepts a string of source code and an operation in that source to
/// build a binary for. This is a convenience for source code that may not be stored
/// in files (e.g. dynamically generated code). On success the function will create
/// platform- specific binary and module objects that can be later used by the
/// runtime to invoke the shader code. The caller can provide a buffer for compiler
/// output, which if present, the runtime will fill *iff* the compilation fails.
///
/// NB: Thread group dimensions are optional parameters here. This is because some
/// runtimes require them statically, and some do not. DirectX requires thread-group
/// sizes to be specified statically to enable compiler optimizations that cannot be
/// used otherwise. CUDA and OpenCL allow runtime specification of these parameters.
/// </remarks>
///
/// <param name="lpszShaderCode"> [in] actual source. cannot be null. </param>
/// <param name="uiShaderCodeSize"> Size of the shader code. </param>
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null. </param>
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
/// compiler output. </param>
/// <param name="nThreadGroupSizeX"> (optional) thread group X dimensions. (see remarks) </param>
/// <param name="nThreadGroupSizeY"> (optional) thread group Y dimensions. (see remarks) </param>
/// <param name="nThreadGroupSizeZ"> (optional) thread group Z dimensions. (see remarks) </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
Compile(
__in char * lpszShaderCode,
__in UINT uiShaderCodeSize,
__in char * lpszOperation,
__in void ** ppPlatformSpecificBinary,
__in void ** ppPlatformSpecificModule,
__in char * lpszCompilerOutput=NULL,
__in int uiCompilerOutput=0,
__in int nThreadGroupSizeX=1,
__in int nThreadGroupSizeY=1,
__in int nThreadGroupSizeZ=1
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the context current. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsDeviceContextCurrent();
///-------------------------------------------------------------------------------------------------
/// <summary> Makes the context current. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL MakeDeviceContextCurrent();
///-------------------------------------------------------------------------------------------------
/// <summary> Releases the current context. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void ReleaseCurrentDeviceContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this accelerator has some support for device to device transfer
/// with the given accelerator. This allows us to skip a trip through host memory
/// in many cases.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsDeviceToDeviceTransfer(Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the supports device memcy. </summary>
///
/// <remarks> Crossbac, 7/12/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsDeviceMemcpy();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the supports function arguments. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsFunctionArguments();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the supports byval arguments. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsByvalArguments();
///-------------------------------------------------------------------------------------------------
/// <summary> Synchronizes the context. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pTask"> (optional) [in,out] If non-null, the task. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Synchronize(Task*pTask=NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if 'p' has accessible memory space. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="p"> [in,out] If non-null, the p. </param>
///
/// <returns> true if accessible memory space, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasAccessibleMemorySpace(Accelerator*p);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the supports pinned host memory. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsPinnedHostMemory();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate memory on the host. Some runtimes (esp. earlier versions of CUDA)
/// require that CUDA APIs be used to allocate host-side buffers, or support
/// specialized host allocators that can help improve DMA performance.
/// AllocatePagelockedHostMemory wraps these APIs for accelerators that have runtime support
/// for this, and uses normal system services (VirtualAlloc on Windows, malloc
/// elsewhere) to satisfy requests.
/// </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <param name="uiBytes"> Number of bytes to allocate. </param>
/// <param name="pbResultPageLocked"> [in,out] If non-null, the result of whether the
/// allocated memory is page-locked is provided here. </param>
///
/// <returns> byte pointer on success, null on failure. </returns>
///-------------------------------------------------------------------------------------------------
virtual void * AllocatePagelockedHostMemory(UINT uiBytes, BOOL * pbResultPageLocked);
///-------------------------------------------------------------------------------------------------
/// <summary> Free host memory. </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <param name="pBuffer"> If non-null, the buffer. </param>
/// <param name="bPageLocked"> true if the memory was allocated in the page-locked area. </param>
///-------------------------------------------------------------------------------------------------
virtual void
FreeHostMemory(
void * pBuffer,
BOOL bPageLocked
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform identifier. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> The platform identifier. </returns>
///-------------------------------------------------------------------------------------------------
virtual cl_platform_id GetPlatformId();
///-------------------------------------------------------------------------------------------------
/// <summary> Enumerate accelerators. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="devices"> [in,out] [in,out] If non-null, the devices. </param>
///-------------------------------------------------------------------------------------------------
static void EnumerateAccelerators(std::vector<Accelerator*> &devices);
///-------------------------------------------------------------------------------------------------
/// <summary> Enumerate platforms. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="platforms"> [in,out] The platforms. </param>
///-------------------------------------------------------------------------------------------------
static void EnumeratePlatforms(std::vector<cl_platform_id> &platforms);
protected:
/// <summary> The device </summary>
cl_device_id m_pDevice;
/// <summary> The context </summary>
cl_context m_pContext;
/// <summary> Queue of open cl commands </summary>
cl_command_queue m_cqCommandQueue;// OpenCL command que
/// <summary> The cp platform </summary>
cl_platform_id m_cpPlatform; // OpenCL platform
///-------------------------------------------------------------------------------------------------
/// <summary> Creates a new platform specific buffer. This routine is called by CreateBuffer to
/// get a new instance of whatever buffer type corresponds to the platform
/// implementing this interface. For example, DXAccelerator will return a new
/// PDXBuffer object, where PDXBuffer is a subclass of PBuffer. The Accelerator super-
/// class can then perform the rest of the work required to initialize the PBuffer.
///
/// We only create PBuffers to provide 'physical' views of the 'logical' buffer
/// abstraction provided by the Datablock. Datablocks can have up to three different
/// channels (data, metadata, template), so consequently, each of which must be
/// backed by its own PBuffer. A PBuffer should not have to know what channel it is
/// backing, but we include that information in it's creation to simplify the
/// materialization of views between different subclasses of PBuffer.
///
/// The "proxy allocator" is present as parameter to handle two corner cases:
///
/// 1. Allocation of host-side buffers by the host-specific subclass of PBuffer
/// (PHBuffer)--for example, we prefer to use a CUDA accelerator object to
/// allocate host memory when a block will be touched by a CUDA-based PTask,
/// because we can use the faster async APIs with memory we allocate using CUDA
/// host allocation APIs. This requires that the HostAccelerator defer the host-
/// side memory allocation to the CUDA accelerator.
///
/// 2. Communication between runtimes that provide some interop support (e.g. CUDA
/// and DirectX can actually share texture objects, meaning there is no need to
/// actually allocate a new buffer to back a CUDA view that already has a DirectX
/// view, but the two accelerators must cooperate to assemble a PBuffer that
/// shares the underlying shared object.
///
/// Case 1 is implemented, while case 2 is largely unimplemented. If no proxy
/// accelerator is provided, allocation will proceed using the accelerator object
/// whose member function is being called to allocate the PBuffer.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pLogicalParent"> [in] If non-null, the datablock that is the logical
/// buffer using this 'physical' buffer to back a particular
/// channel on this accelerator. </param>
/// <param name="nDatblockChannelIndex"> Zero-based index of the channel being backed. Must be:
/// * DBDATA_IDX = 0, OR
/// * DBMETADATA_IDX = 1, OR
/// * DBTEMPLATE_IDX = 2. </param>
/// <param name="uiBufferAccessFlags"> Access flags determining what views to create. </param>
/// <param name="pProxyAllocator"> [in,out] If non-null, the proxy allocator. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual PBuffer* NewPlatformSpecificBuffer(Datablock * pLogicalParent,
UINT nDatblockChannelIndex,
BUFFERACCESSFLAGS uiBufferAccessFlags,
Accelerator * pProxyAllocator
);
};
};
#endif // OPENCL_SUPPORT
#endif

Просмотреть файл

@ -1,269 +0,0 @@
//--------------------------------------------------------------------------------------
// File: CLTask.h
//
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _CL_PTASK_H_
#define _CL_PTASK_H_
#ifdef OPENCL_SUPPORT
#include "primitive_types.h"
#include "cuaccelerator.h"
#include "task.h"
#include "channel.h"
#include "CompiledKernel.h"
#include "oclhdr.h"
#include <map>
#include <vector>
namespace PTask {
class CLTask : public Task {
friend class XMLReader;
friend class XMLWriter;
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="hRuntimeTerminateEvt"> Handle of the terminate. </param>
/// <param name="hGraphTeardownEvent"> Handle of the stop event. </param>
/// <param name="hGraphStopEvent"> Handle of the running event. </param>
/// <param name="hGraphRunningEvent"> The graph running event. </param>
/// <param name="pCompiledKernel"> The CompiledKernel associated with this task. </param>
///-------------------------------------------------------------------------------------------------
CLTask(
__in HANDLE hRuntimeTerminateEvt,
__in HANDLE hGraphTeardownEvent,
__in HANDLE hGraphStopEvent,
__in HANDLE hGraphRunningEvent,
__in CompiledKernel * pCompiledKernel
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~CLTask();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates this object. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pAccelerators"> [in] non-null, the accelerators to compile for. </param>
/// <param name="pKernel"> [in,out] If non-null, the kernel. </param>
///
/// <returns> HRESULT (use SUCCEEDED/FAILED macros) </returns>
///-------------------------------------------------------------------------------------------------
virtual HRESULT Create(std::set<Accelerator*>& pAccelerators,
CompiledKernel * pKernel
);
///-------------------------------------------------------------------------------------------------
/// <summary> Runs this CLTask. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificDispatch();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a compute geometry. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="tgx"> (optional) the thread group x dimensions. </param>
/// <param name="tgy"> (optional) the thread group y dimensions. </param>
/// <param name="tgz"> (optional) the thread group z dimensions. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetComputeGeometry(int tgx=1, int tgy=1, int tgz=1 );
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a block and grid size. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="grid"> The grid. </param>
/// <param name="block"> The block. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetBlockAndGridSize(PTASKDIM3 grid, PTASKDIM3 block);
///-------------------------------------------------------------------------------------------------
/// <summary> When the graph is complete, (indicated because Graph.Run was called), this method
/// is called on every task to allow tasks to perform and one-time initializations
/// that cannot be performed without knowing that the structure of the graph is now
/// static. For example, computing parameter offset maps for dispatch.
/// </summary>
///
/// <remarks> Crossbac, 1/5/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void PlatformSpecificOnGraphComplete();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
/// </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
/// <param name="ordinal"> The ordinal. </param>
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindInput(Port * pPort,
int ordinal,
UINT uiActualIndex,
PBuffer * pBuffer
);
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an individual output
/// parameter.
/// </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
/// <param name="ordinal"> The ordinal. </param>
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindOutput(Port * pPort,
int ordinal,
UINT uiActualIndex,
PBuffer * pBuffer);
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
/// </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
/// <param name="ordinal"> The ordinal. </param>
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
/// <param name="bScalarBinding"> true to scalar binding. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindConstant(Port * pPort,
int ordinal,
UINT uiActualIndex,
PBuffer * pBuffer
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform specific finalize bindings. </summary>
///
/// <remarks> Crossbac, 1/5/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificFinalizeBindings();
///-------------------------------------------------------------------------------------------------
/// <summary> Bind accelerator executable. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL BindExecutable();
///-------------------------------------------------------------------------------------------------
/// <summary> Bind parameter. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pCS"> The create struct. </param>
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
/// <param name="pPort"> [in,out] If non-null, the port. </param>
/// <param name="ordinal"> [in,out] The ordinal. </param>
///-------------------------------------------------------------------------------------------------
void BindParameter(cl_kernel pCS, PBuffer * pBuffer, Port * pPort, int &ordinal);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the estimate global size. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
UINT EstimateGlobalSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Loads source code from a file before compiling. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="cFilename"> Filename of the file. </param>
/// <param name="cPreamble"> The preamble. </param>
/// <param name="szFinalLength"> [in,out] If non-null, length of the final. </param>
///
/// <returns> null if it fails, else the source. </returns>
///-------------------------------------------------------------------------------------------------
char* CLTask::LoadSource(
const char* cFilename,
const char* cPreamble,
size_t* szFinalLength
);
/// <summary> A map from accelerator to compiled kernel object,
/// allowing the system to dispatch on arbitrary
/// accelerators by selecting the right object
/// for the dispatch accelerator.
/// </summary>
std::map<Accelerator*, cl_kernel> m_pCSMap;
/// <summary> A map from accelerator to module,
/// allowing the system to dispatch on arbitrary
/// accelerators by selecting the right object
/// for the dispatch accelerator.
/// </summary>
std::map<Accelerator*, cl_program> m_pModuleMap;
/// <summary> The preferred x thread group size </summary>
UINT m_nPreferredXDim;
/// <summary> The preferred y thread group size </summary>
UINT m_nPreferredYDim;
/// <summary> The preferred z thread group size </summary>
UINT m_nPreferredZDim;
/// <summary> true if the user explicitly set the thread
/// group geometry with a call to
/// Task->SetGeometry.
/// </summary>
BOOL m_bGeometryExplicit;
};
};
#endif // OPENCL_SUPPORT
#endif // _CLTask_H_

Просмотреть файл

@ -1,835 +0,0 @@
//--------------------------------------------------------------------------------------
// File: cuaccelerator.h
// cuda-based accelerator
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _CUDA_ACCELERATOR_H_
#define _CUDA_ACCELERATOR_H_
#ifdef CUDA_SUPPORT
#include "primitive_types.h"
#include "accelerator.h"
#include "task.h"
#include "cuhdr.h"
#include <vector>
#include <set>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> CUDA specific device attributes. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
typedef struct cudevparms_t {
/// <summary> The major version number for the device</summary>
int major;
/// <summary> The minor version number for the device</summary>
int minor;
/// <summary> Device id returned by cuda initialization </summary>
CUdevice dev;
/// <summary> The driver version </summary>
int driverVersion;
/// <summary> The total global memory in MB</summary>
size_t totalGlobalMem;
/// <summary> Number of multi processors </summary>
int multiProcessorCount;
/// <summary> The total constant memory in KB</summary>
int totalConstantMemory;
/// <summary> The shared memory per block </summary>
int sharedMemPerBlock;
/// <summary> The number of registers per block </summary>
int regsPerBlock;
/// <summary> Size of the warp </summary>
int warpSize;
/// <summary> The maximum threads per block </summary>
int maxThreadsPerBlock;
/// <summary> The maximum block dimensions </summary>
int maxBlockDim[3];
/// <summary> The maximum grid dimensions </summary>
int maxGridDim[3];
/// <summary> The memory pitch </summary>
int memPitch;
/// <summary> The texture alignment </summary>
int textureAlign;
/// <summary> The clock rate </summary>
int clockRate;
/// <summary> True if the device can overlap gpu
/// computation with data transfer
/// </summary>
int gpuOverlap;
/// <summary> True if kernel execute timeout is enabled </summary>
int kernelExecTimeoutEnabled;
/// <summary> True if the device is integrated,
/// false if the device is connected on PCIe
/// </summary>
int integrated;
/// <summary> True if the runtime can map host memory
/// for data transfers to/from this device
/// </summary>
int canMapHostMemory;
/// <summary> True if the device supports
/// concurrent execution of multiple different
/// kernels
/// </summary>
int concurrentKernels;
/// <summary> True if ecc is enabled for the device memory </summary>
int eccEnabled;
/// <summary> The if the tcc driver is in use for this device </summary>
int tccDriver;
/// <summary> True if the device supports unified addressing.
/// Unified addressing means device and host pointers are equal
/// for page-locked host-allocations.
/// </summary>
int unifiedAddressing;
/// <summary> Name of the device </summary>
char deviceName[256];
} CUDA_DEVICE_ATTRIBUTES;
static const int MAXCTXTS = 16;
static const int MAXCTXDEPTH = 32;
class CUAccelerator :
public Accelerator
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="attribs"> [in,out] If non-null, the attributes. </param>
///-------------------------------------------------------------------------------------------------
CUAccelerator(CUDA_DEVICE_ATTRIBUTES * attribs);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~CUAccelerator();
///-------------------------------------------------------------------------------------------------
/// <summary> Open the CUAccelerator. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> HRESULT--use SUCCEEDED() and FAILED() macros to check. </returns>
///-------------------------------------------------------------------------------------------------
HRESULT Open();
///-------------------------------------------------------------------------------------------------
/// <summary> Opens a CUAccelerator object for the CUDA
/// device specified. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="dev"> The device id. </param>
///
/// <returns> HRESULT--use SUCCEEDED() and FAILED() macros to check. </returns>
///-------------------------------------------------------------------------------------------------
HRESULT Open(CUdevice dev);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the device. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the device. </returns>
///-------------------------------------------------------------------------------------------------
void* GetDevice();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the context. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the context. </returns>
///-------------------------------------------------------------------------------------------------
void* GetContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates an asynchronous context for the task. Create the cuda stream for this
/// ptask.
/// </summary>
///
/// <remarks> crossbac, 12/20/2011.
///
/// This method is required of all subclasses, and abstracts the work associated with
/// managing whatever framework-level asynchrony abstractions are supported by the
/// backend target. For example, CUDA supports the "stream", while DirectX supports
/// an ID3D11ImmediateContext, OpenCL has command queues, and so on.
/// </remarks>
///
/// <param name="pTask"> [in] non-null, the CUDA-capable acclerator to which the
/// stream is bound. </param>
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual AsyncContext *
PlatformSpecificCreateAsyncContext(
__in Task * pTask,
__in ASYNCCONTEXTTYPE eAsyncContextType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Cache the shader and module objects associated with
/// successful compilation of szFunction in szFile.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="szFile"> [in] non-null, the file name. </param>
/// <param name="szFunction"> [in] non-null, the function. </param>
/// <param name="pCUDAFunction"> The cuda function. </param>
/// <param name="pCUDAModule"> The cuda module. </param>
///-------------------------------------------------------------------------------------------------
void CachePutShader(char * szFile,
char * szFunction,
CUfunction pCUDAFunction,
CUmodule pCUDAModule
);
///-------------------------------------------------------------------------------------------------
/// <summary> Check the cache for a compiled version of the
/// function szFunction in the file szFile. If it
/// is present, compilation can be skipped.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="szFile"> [in] non-null, the file name. </param>
/// <param name="szFunction"> [in] non-null, the function. </param>
/// <param name="pCUDAFunction"> [out] The cuda function. </param>
/// <param name="pCUDAModule"> [out] The cuda module. </param>
///
/// <returns> true if the shader is present in the cache, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL CacheGetShader(char * szFile,
char * szFunction,
CUfunction &pCUDAFunction,
CUmodule &pCUDAModule
);
///-------------------------------------------------------------------------------------------------
/// <summary> Compiles CUDA code to create a new binary
/// and module. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="lpszFileName"> [in,out] If non-null, filename of the file. </param>
/// <param name="lpszOperation"> [in,out] If non-null, the operation. </param>
/// <param name="ppPlatformSpecificBinary"> [in,out] If non-null, the platform specific binary. </param>
/// <param name="ppPlatformSpecificModule"> [in,out] If non-null, the platform specific module. </param>
/// <param name="lpszCompilerOutput"> (optional) [in,out] If non-null, the compiler output. </param>
/// <param name="uiCompilerOutput"> (optional) the compiler output. </param>
/// <param name="nThreadGroupSizeX"> (optional) the thread group size x coordinate. </param>
/// <param name="nThreadGroupSizeY"> (optional) the thread group size y coordinate. </param>
/// <param name="nThreadGroupSizeZ"> (optional) The thread group size z coordinate. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Compile(
char * lpszFileName,
char * lpszOperation,
void ** ppPlatformSpecificBinary,
void ** ppPlatformSpecificModule,
char * lpszCompilerOutput=NULL,
int uiCompilerOutput=0,
int nThreadGroupSizeX=1,
int nThreadGroupSizeY=1,
int nThreadGroupSizeZ=1
);
///-------------------------------------------------------------------------------------------------
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
///
/// <remarks> Crossbac, 12/17/2011.
///
/// The function accepts a string of source code and an operation in that source to
/// build a binary for. This is a convenience for source code that may not be stored
/// in files (e.g. dynamically generated code). On success the function will create
/// platform- specific binary and module objects that can be later used by the
/// runtime to invoke the shader code. The caller can provide a buffer for compiler
/// output, which if present, the runtime will fill *iff* the compilation fails.
///
/// NB: Thread group dimensions are optional parameters here. This is because some
/// runtimes require them statically, and some do not. DirectX requires thread-group
/// sizes to be specified statically to enable compiler optimizations that cannot be
/// used otherwise. CUDA and OpenCL allow runtime specification of these parameters.
/// </remarks>
///
/// <param name="lpszShaderCode"> [in] actual source. cannot be null. </param>
/// <param name="uiShaderCodeSize"> Size of the shader code. </param>
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null. </param>
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
/// compiler output. </param>
/// <param name="nThreadGroupSizeX"> (optional) thread group X dimensions. (see remarks) </param>
/// <param name="nThreadGroupSizeY"> (optional) thread group Y dimensions. (see remarks) </param>
/// <param name="nThreadGroupSizeZ"> (optional) thread group Z dimensions. (see remarks) </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
Compile(
__in char * lpszShaderCode,
__in UINT uiShaderCodeSize,
__in char * lpszOperation,
__in void ** ppPlatformSpecificBinary,
__in void ** ppPlatformSpecificModule,
__in char * lpszCompilerOutput=NULL,
__in int uiCompilerOutput=0,
__in int nThreadGroupSizeX=1,
__in int nThreadGroupSizeY=1,
__in int nThreadGroupSizeZ=1
);
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if the context of this accelerator is current. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsDeviceContextCurrent();
///-------------------------------------------------------------------------------------------------
/// <summary> Makes the context current. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL MakeDeviceContextCurrent();
///-------------------------------------------------------------------------------------------------
/// <summary> Releases the current context. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void ReleaseCurrentDeviceContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Synchronizes the context. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="ctxt"> [in,out] If non-null, the ctxt. </param>
/// <param name="pTask"> (optional) [in,out] If non-null, the task. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Synchronize(Task*pTask=NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if the cuda runtime has been initialized. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if cuda initialized, false if not. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL IsCUDAInitialized();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a cuda initialized. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="bCUDAInitialized"> true to indicate CUinit has been called. </param>
///-------------------------------------------------------------------------------------------------
static void SetCUDAInitialized(BOOL bCUDAInitialized);
///-------------------------------------------------------------------------------------------------
/// <summary> Device to device transfer. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
/// <param name="pSrcBuffer"> [in,out] If non-null, buffer for source data. </param>
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
DeviceToDeviceTransfer(
__inout PBuffer * pDstBuffer,
__in PBuffer * pSrcBuffer,
__in AsyncContext * pAsyncContext
);
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this accelerator has support for unified addressing. Unified
/// addressing means there is no distinction between device and host pointers (for
/// page-locked memory). This is important because the datablock abstraction
/// maintains a buffer per logical memory space, and if two memory spaces are
/// logically the same (unified), but only for pointers to page-locked memory, a
/// number of special cases arise for allocation, freeing, ownership, etc. Sadly,
/// this complexity is required in the common case, because asynchronous transfers
/// only work in CUDA when the host pointers are page-locked. We need to be able to
/// tell when a page-locked buffer in the host-memory space is different from a
/// device pointer in a CUAccelerator memory space.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> true if the device supports unified addressing. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsUnifiedAddressing();
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this accelerator has some support for device to device transfer
/// with the given accelerator. This allows us to skip a trip through host memory
/// in many cases.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsDeviceToDeviceTransfer(Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the supports device memcy. </summary>
///
/// <remarks> Crossbac, 7/12/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsDeviceMemcpy();
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this accelerator supports top-level
/// function arguments. This will always return true
/// for CUDA accelerators.</summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsFunctionArguments();
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this platform supports passing
/// structs by value as arguments to top-level kernel
/// entry points. This will always return true for
/// CUDA accelerators.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsByvalArguments();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if 'pOtherAccelerator' has an accessible memory space.
/// The other accelerator's memory space is accessible if there
/// is a way to transfer data between the two other than by
/// copying to host-memory as a waypoint. For example, some
/// CUDA accelerators support peer-to-peer copy over PCI,
/// and DirectX has interop APIs with CUDA.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pOtherAccelerator"> [in,out] If non-null, the other accelerator. </param>
///
/// <returns> true if accessible memory space, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasAccessibleMemorySpace(Accelerator * pOtherAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if the accelerator supports pinned host memory. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsPinnedHostMemory();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate memory on the host. Some runtimes (esp. earlier versions of CUDA)
/// require that CUDA APIs be used to allocate host-side buffers, or support
/// specialized host allocators that can help improve DMA performance.
/// AllocatePagelockedHostMemory wraps these APIs for accelerators that have runtime support
/// for this, and uses normal system services (VirtualAlloc on Windows, malloc
/// elsewhere) to satisfy requests.
/// </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <param name="uiBytes"> Number of bytes to allocate. </param>
/// <param name="pbResultPageLocked"> [in,out] If non-null, the result of whether the
/// allocated memory is page-locked is provided here. </param>
///
/// <returns> byte pointer on success, null on failure. </returns>
///-------------------------------------------------------------------------------------------------
virtual void * AllocatePagelockedHostMemory(UINT uiBytes, BOOL * pbResultPageLocked);
///-------------------------------------------------------------------------------------------------
/// <summary> Free host memory. </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <param name="pBuffer"> If non-null, the buffer. </param>
/// <param name="bPageLocked"> true if the memory was allocated in the page-locked area. </param>
///-------------------------------------------------------------------------------------------------
virtual void
FreeHostMemory(
void * pBuffer,
BOOL bPageLocked
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the device identifier. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The device identifier. </returns>
///-------------------------------------------------------------------------------------------------
virtual int GetDeviceId();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the device attributes. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the device attributes. </returns>
///-------------------------------------------------------------------------------------------------
CUDA_DEVICE_ATTRIBUTES* GetDeviceAttributes();
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this accelerator encapsulates a backend framework that provides
/// explicit APIs for managing outstanding (Asynchronous) operations. When this is
/// the case, the corresponding AsyncContext subclass can manage outstanding
/// dependences explicitly to increase concurrency and avoid syncing with the device.
/// When it is *not* the case, we must synchronize when we data to and from this
/// accelerator context and contexts that *do* support an explicit async API. For
/// example, CUDA supports the stream and event API to explicitly manage dependences
/// and we use this feature heavily to allow task dispatch to get far ahead of device-
/// side dispatch. However when data moves between CUAccelerators and other
/// accelerator classes, we must use synchronous operations or provide a way to wait
/// for outstanding dependences from those contexts to resolve. This method is used
/// to tell us whether we can create an outstanding dependence after making calls
/// that queue work, or whether we need to synchronize.
///
/// This override returns TRUE since this is the CUDA encapsulation class.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsExplicitAsyncOperations();
///-------------------------------------------------------------------------------------------------
/// <summary> Enumerate accelerators. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="devices"> [out] non-null, the acclerator objects supporting CUDA. </param>
///-------------------------------------------------------------------------------------------------
static void EnumerateAccelerators(std::vector<Accelerator*> &devices);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes the thread local context. </summary>
///
/// <remarks> Crossbac, 3/20/2014. </remarks>
///
/// <param name="eRole"> The role. </param>
/// <param name="bMakeDefault"> Device is the default for the thread. </param>
///-------------------------------------------------------------------------------------------------
static void
InitializeTLSContextManagement(
__in Accelerator * pDefaultAccelerator,
__in PTTHREADROLE eRole,
__in BOOL bPooledThread
);
///-------------------------------------------------------------------------------------------------
/// <summary> Deinitializes the thread local context. </summary>
///
/// <remarks> crossbac, 6/17/2014. </remarks>
///
/// <param name="eRole"> The role. </param>
///-------------------------------------------------------------------------------------------------
static void DeinitializeTLSContextManagement();
///-------------------------------------------------------------------------------------------------
/// <summary> Determines if we can requires thread local context initialization. </summary>
///
/// <remarks> Crossbac, 3/20/2014. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL UsesTLSContextManagement();
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes the thread local context. </summary>
///
/// <remarks> Crossbac, 3/20/2014. </remarks>
///
/// <param name="eRole"> The role. </param>
/// <param name="bMakeDefault"> This device should be the default for the thread. </param>
///-------------------------------------------------------------------------------------------------
virtual void
InitializeTLSContext(
__in PTTHREADROLE eRole,
__in BOOL bMakeDefault,
__in BOOL bPooledThread
);
protected:
void CheckContextInvariants();
void CheckContextTLSInitialized();
/// <summary> true to cuda initialized </summary>
static BOOL s_bCUDAInitialized;
/// <summary> context of primary device. This is essentially the "primary" CUDA context, but
/// might not actually be the primary if user code is also managing device contexts
/// or using cuda runtime API calls.
/// </summary>
static CUcontext s_pRootContext;
/// <summary> true if the root context is valid </summary>
static BOOL s_bRootContextValid;
/// <summary> device id for the root context. </summary>
static CUdevice s_nRootContext;
/// <summary> known ptask contexts. these should be created
/// at init time in single threaded code (Scheduler::CreateAccelerators),
/// so we consider them immutable once the runtime is going, so
/// we needn't synchronize them or make them TLS. </summary>
static CUcontext s_vKnownPTaskContexts[MAXCTXTS];
static UINT s_nKnownPTaskContexts;
static CUcontext s_vKnownUserContexts[MAXCTXTS];
static UINT s_nKnownUserContexts;
/// <summary> Thread-local storage for caching device contexts,
/// enabling some heuristics to avoid unnecessary and occasionally
/// expensive calls to cuCtx[Push|Pop]Current.
/// Additional book-keeping is necessary to keep track of
/// contexts that don't belong to us (e.g. "primary" contexts
/// created in user code as a side effect of invoking cuda runtime
/// apis like cudaFree()).
/// </summary>
__declspec(thread) static CUAccelerator * s_pDefaultDeviceCtxt;
__declspec(thread) static CUAccelerator * s_pCurrentDeviceCtxt;
__declspec(thread) static int s_vContextDepthMap[MAXCTXTS];
__declspec(thread) static CUAccelerator ** s_pContextChangeMap[MAXCTXTS];
__declspec(thread) static CUAccelerator * s_vContextChangeMap[MAXCTXTS*MAXCTXDEPTH];
__declspec(thread) static CUcontext s_pUserStackTop;
__declspec(thread) static BOOL s_bContextTLSInit;
__declspec(thread) static BOOL s_bThreadPoolThread;
__declspec(thread) static PTTHREADROLE s_eThreadRole;
static BOOL IsKnownContext(CUcontext ctx);
static BOOL IsKnownContext(CUcontext ctx, CUcontext * pContexts, UINT uiCtxCount);
static BOOL AddKnownContext(CUcontext ctx, CUcontext * pContexts, UINT * puiCtxCount);
static BOOL IsUserContext(CUcontext ctx);
static BOOL IsPTaskContext(CUcontext ctx);
static BOOL IsKnownPTaskContext(CUcontext ctx);
static BOOL IsKnownUserContext(CUcontext ctx);
static BOOL AddKnownPTaskContext(CUcontext ctx);
static BOOL AddKnownUserContext(CUcontext ctx);
static BOOL CheckContextProvenance(CUcontext ctx);
/// <summary> The device </summary>
CUdevice m_pDevice;
/// <summary> The context </summary>
CUcontext m_pContext;
/// <summary> true if this is also an application-level primary context.
/// This means that PTask shares it with user code, does not
/// own the context, and cannot make assumptions about context
/// state on entry to PTask APIs on *application* threads.
/// </summary>
BOOL m_bApplicationPrimaryContext;
/// <summary> Identifier for the device </summary>
int m_nDeviceId;
/// <summary> The device attributes </summary>
CUDA_DEVICE_ATTRIBUTES *m_pDeviceAttributes;
/// <summary> The attributes </summary>
CUDA_DEVICE_ATTRIBUTES m_attrs;
/// <summary> The set of accelerators that are known accessible for P2P transfers. </summary>
std::set<Accelerator*> m_vP2PAccessible;
/// <summary> The set of accelerators that are known enabled for P2P transfers. </summary>
std::set<Accelerator*> m_vP2PEnabled;
/// <summary> The set of accelerators that are *known inaccessible* for P2P transfers. </summary>
std::set<Accelerator*> m_vP2PInaccessible;
/// <summary> The minimum stream priority. </summary>
int m_nMinStreamPriority;
/// <summary> The maximum stream priority. </summary>
int m_nMaxStreamPriority;
/// <summary> The maximum outstading launches. </summary>
int m_nMaxOutstadingLaunches;
///-------------------------------------------------------------------------------------------------
/// <summary> Creates a new platform specific buffer. This routine is called by CreateBuffer to
/// get a new instance of whatever buffer type corresponds to the platform
/// implementing this interface. For example, DXAccelerator will return a new
/// PDXBuffer object, where PDXBuffer is a subclass of PBuffer. The Accelerator super-
/// class can then perform the rest of the work required to initialize the PBuffer.
///
/// We only create PBuffers to provide 'physical' views of the 'logical' buffer
/// abstraction provided by the Datablock. Datablocks can have up to three different
/// channels (data, metadata, template), so consequently, each of which must be
/// backed by its own PBuffer. A PBuffer should not have to know what channel it is
/// backing, but we include that information in it's creation to simplify the
/// materialization of views between different subclasses of PBuffer.
///
/// The "proxy allocator" is present as parameter to handle two corner cases:
///
/// 1. Allocation of host-side buffers by the host-specific subclass of PBuffer
/// (PHBuffer)--for example, we prefer to use a CUDA accelerator object to
/// allocate host memory when a block will be touched by a CUDA-based PTask,
/// because we can use the faster async APIs with memory we allocate using CUDA
/// host allocation APIs. This requires that the HostAccelerator defer the host-
/// side memory allocation to the CUDA accelerator.
///
/// 2. Communication between runtimes that provide some interop support (e.g. CUDA
/// and DirectX can actually share texture objects, meaning there is no need to
/// actually allocate a new buffer to back a CUDA view that already has a DirectX
/// view, but the two accelerators must cooperate to assemble a PBuffer that
/// shares the underlying shared object.
///
/// Case 1 is implemented, while case 2 is largely unimplemented. If no proxy
/// accelerator is provided, allocation will proceed using the accelerator object
/// whose member function is being called to allocate the PBuffer.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pLogicalParent"> [in] If non-null, the datablock that is the logical
/// buffer using this 'physical' buffer to back a particular
/// channel on this accelerator. </param>
/// <param name="nDatblockChannelIndex"> Zero-based index of the channel being backed. Must be:
/// * DBDATA_IDX = 0, OR
/// * DBMETADATA_IDX = 1, OR
/// * DBTEMPLATE_IDX = 2. </param>
/// <param name="uiBufferAccessFlags"> Access flags determining what views to create. </param>
/// <param name="pProxyAllocator"> [in,out] If non-null, the proxy allocator. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual PBuffer* NewPlatformSpecificBuffer(Datablock * pLogicalParent,
UINT nDatblockChannelIndex,
BUFFERACCESSFLAGS uiBufferAccessFlags,
Accelerator * pProxyAllocator
);
///-------------------------------------------------------------------------------------------------
/// <summary> Determine if we should attempt page locked allocation. </summary>
///
/// <remarks> Crossbac, 9/24/2012. </remarks>
///
/// <param name="uiAllocBytes"> The allocate in bytes. </param>
///
/// <returns> true if we should page-lock the requested buffer. </returns>
///-------------------------------------------------------------------------------------------------
BOOL ShouldAttemptPageLockedAllocation(UINT uiAllocBytes);
///-------------------------------------------------------------------------------------------------
/// <summary> Determine if we can access a peer device through explicit peer APIs.
/// Cache the result.
/// </summary>
///
/// <remarks> crossbac, 7/2/2013. </remarks>
///
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///
/// <returns> true if we can access peer, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL CanAccessPeer(Accelerator * pAccelerator);
friend class PCUBuffer;
};
};
#endif
#endif

Просмотреть файл

@ -1,12 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: cuhdr.h
//
// summary: Declares the cuhdr class
///-------------------------------------------------------------------------------------------------
#ifndef __CUHDR_H__
#define __CUHDR_H__
#ifdef CUDA_SUPPORT
#include "cuda.h"
#endif
#endif

Просмотреть файл

@ -1,329 +0,0 @@
//--------------------------------------------------------------------------------------
// File: CUTask.h
// CUDA based task
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _CUDA_TASK_H_
#define _CUDA_TASK_H_
#ifdef CUDA_SUPPORT
#include "primitive_types.h"
#include "accelerator.h"
#include "task.h"
#include "cuhdr.h"
#include <map>
#include <vector>
#include <list>
namespace PTask {
class CompiledKernel;
class CUTask : public Task {
friend class GeometryEstimator;
friend class XMLReader;
friend class XMLWriter;
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="hRuntimeTerminateEvt"> Handle of the global terminate event. </param>
/// <param name="hGraphTeardownEvent"> Handle of the stop event. </param>
/// <param name="hGraphStopEvent"> Handle of the running event. </param>
/// <param name="hGraphRunningEvent"> The graph running event. </param>
/// <param name="pCompiledKernel"> The CompiledKernel associated with this task. </param>
///-------------------------------------------------------------------------------------------------
CUTask(
__in HANDLE hRuntimeTerminateEvt,
__in HANDLE hGraphTeardownEvent,
__in HANDLE hGraphStopEvent,
__in HANDLE hGraphRunningEvent,
__in CompiledKernel * pCompiledKernel
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~CUTask();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates a PTask. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pAccelerators"> [in] non-null, list of accelerators this task might run on. </param>
/// <param name="pCompiledKernel"> [in,out] If non-null, the compiled kernel. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual HRESULT Create(
std::set<Accelerator*>& pAccelerators,
CompiledKernel * pCompiledKernel
);
///-------------------------------------------------------------------------------------------------
/// <summary> Runs this ptask. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificDispatch();
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes instrumentation. </summary>
///
/// <remarks> t-nailaf, 06/10/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void InitializeInstrumentation();
///-------------------------------------------------------------------------------------------------
/// <summary> Finalizes instrumentation. </summary>
///
/// <remarks> t-nailaf, 06/10/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void FinalizeInstrumentation();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a compute geometry. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="tgx"> (optional) the thread group X dimensions. </param>
/// <param name="tgy"> (optional) the thread group Y dimensions. </param>
/// <param name="tgz"> (optional) the thread group Z dimensions. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetComputeGeometry(int tgx=1, int tgy=1, int tgz=1 );
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a block and grid size. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="grid"> The grid. </param>
/// <param name="block"> The block. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetBlockAndGridSize(PTASKDIM3 grid, PTASKDIM3 block);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a synchronization timestamp. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="p"> [in,out] If non-null, the p. </param>
///
/// <returns> The synchronization timestamp. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetSynchronizationTimestamp(Accelerator * p);
///-------------------------------------------------------------------------------------------------
/// <summary> Increment synchronise timestamp. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="p"> [in,out] If non-null, the p. </param>
///-------------------------------------------------------------------------------------------------
void IncrementSyncTimestamp(Accelerator * p);
///-------------------------------------------------------------------------------------------------
/// <summary> When the graph is complete, (indicated because Graph.Run was called), this method
/// is called on every task to allow tasks to perform and one-time initializations
/// that cannot be performed without knowing that the structure of the graph is now
/// static. For example, computing parameter offset maps for dispatch.
/// </summary>
///
/// <remarks> Crossbac, 1/5/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void PlatformSpecificOnGraphComplete();
///-------------------------------------------------------------------------------------------------
/// <summary> Estimate dispatch dimensions. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void EstimateDispatchDimensions();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
/// </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
/// <param name="ordinal"> The ordinal. </param>
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindInput(Port * pPort,
int ordinal,
UINT uiActualIndex,
PBuffer * pBuffer
);
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an individual output
/// parameter.
/// </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
/// <param name="ordinal"> The ordinal. </param>
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindOutput(Port * pPort,
int ordinal,
UINT uiActualIndex,
PBuffer * pBuffer);
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
/// </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
/// <param name="ordinal"> The ordinal. </param>
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
/// <param name="bScalarBinding"> true to scalar binding. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindConstant(Port * pPort,
int ordinal,
UINT uiActualIndex,
PBuffer * pBuffer
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform specific finalize bindings. </summary>
///
/// <remarks> Crossbac, 1/5/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificFinalizeBindings();
///-------------------------------------------------------------------------------------------------
/// <summary> Bind accelerator executable. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL BindExecutable();
///-------------------------------------------------------------------------------------------------
/// <summary> Calculates the parameter offsets. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///-------------------------------------------------------------------------------------------------
void ComputeParameterOffsets();
///-------------------------------------------------------------------------------------------------
/// <summary> Adds a parameter indeces to 'indexmap'. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="portmap"> [in,out] [in,out] If non-null, the portmap. </param>
/// <param name="indexmap"> [in,out] [in,out] If non-null, the indexmap. </param>
///-------------------------------------------------------------------------------------------------
void AddParameterIndeces(
std::map<UINT, Port*>& portmap,
std::map<UINT, Port*>& indexmap);
///-------------------------------------------------------------------------------------------------
/// <summary> Collect migration resources. </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="vblocks"> [in,out] [in,out] If non-null, the vblocks. </param>
/// <param name="vaccs"> [in,out] [in,out] If non-null, the vaccs. </param>
/// <param name="vstreams"> [in,out] The vstreams. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL CollectMigrationResources(
std::list<Datablock*> &vblocks,
std::list<Accelerator*> &vaccs,
std::list<CUstream> &vstreams);
///-------------------------------------------------------------------------------------------------
/// <summary> Executes the ps dispatch enter action. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="pContext"> The context. </param>
/// <param name="hStream"> The stream. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL OnPSDispatchEnter(CUstream hStream);
///-------------------------------------------------------------------------------------------------
/// <summary> Executes the ps dispatch exit action. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <param name="pContext"> The context. </param>
/// <param name="hStream"> The stream. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL OnPSDispatchExit(CUstream hStream);
std::map<Accelerator*, CUfunction> m_pCSMap;
std::map<Accelerator*, CUmodule> m_pModuleMap;
std::map<Port*, UINT> m_pParameterOffsets;
UINT m_uiParameterSize;
BOOL m_bParameterOffsetsInitialized;
UINT m_nPreferredXDim;
UINT m_nPreferredYDim;
UINT m_nPreferredZDim;
BOOL m_bGeometryExplicit;
BOOL m_bThreadBlockSizesExplicit;
PTASKDIM3 m_pThreadBlockSize;
PTASKDIM3 m_pGridSize;
CUevent m_hPSDispatchStart;
CUevent m_hPSDispatchEnd;
BOOL m_bPSDispatchEventsValid;
};
};
#endif // CUDA_SUPPORT
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,591 +0,0 @@
//--------------------------------------------------------------------------------------
// File: datablocktemplate.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _DATABLOCK_TEMPLATE_H_
#define _DATABLOCK_TEMPLATE_H_
#include <stdio.h>
#include <crtdbg.h>
#include <Windows.h>
#include "primitive_types.h"
#include "datablock.h"
#include "ReferenceCounted.h"
using namespace PTask;
///-------------------------------------------------------------------------------------------------
/// <summary> Values that represent the different points in the lifecycle of a datablock
/// where the application context associated with a datablock can be managed
/// via a callback. </summary>
///
/// <remarks> jcurrey, 5/1/2014. </remarks>
///-------------------------------------------------------------------------------------------------
typedef enum applicationcontext_callback_point_t {
/// <summary> Point at which a datablock is created. </summary>
CALLBACKPOINT_CREATE,
/// <summary> Point at which a datablock is cloned. </summary>
CALLBACKPOINT_CLONE,
/// <summary> Point at which a datablock is destroyed. </summary>
CALLBACKPOINT_DESTROY
} APPLICATIONCONTEXTCALLBACKPOINT;
///-------------------------------------------------------------------------------------------------
/// <summary> Function signature of callbacks used to manage the application context associated
/// with datablocks. set on a per-template basis, via
/// DatablockTemplate::SetApplicationContextCallback().
///
/// If eCallbackPoint is CALLBACKPOINT_CREATE or CALLBACKPOINT_DESTROY,
/// ppApplicationContext points to the application context of the datablock being
/// created or destroyed.
///
/// If eCallbackPoint is CALLBACKPOINT_CLONE, ppApplicationContext points to the
/// application context of the datablock clone being created. The application context
/// of the datablock being cloned is accessible via pDatablock.
///
/// pDatablock is provided for information only. None of its state should be modified
/// by the callback.
/// </summary>
///
/// <remarks> jcurrey, 5/1/2014. </remarks>
///
/// <param name="eCallbackPoint"> [in] The point in the datablock's lifecycle at which the callback was called. </param>
/// <param name="pDatablock"> [in] The datablock being created, cloned or destroyed. </param>
/// <param name="ppApplicationContext"> [inout] The application context to be managed. </param>
///-------------------------------------------------------------------------------------------------
typedef void (__stdcall *LPFNAPPLICATIONCONTEXTCALLBACK)(
__in APPLICATIONCONTEXTCALLBACKPOINT eCallbackPoint,
__in const Datablock * pDatablock,
__inout void ** ppApplicationContext
);
namespace PTask {
class DatablockTemplate : public ReferenceCounted
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="lpszTemplateName"> [in] If non-null, name of the template. </param>
/// <param name="uiElementStride"> [in] The element stride in bytes. </param>
/// <param name="uiElementsX"> [in] Number of elements in X dimension. </param>
/// <param name="uiElementsY"> [in] Number of elements in Y dimension. </param>
/// <param name="uiElementsZ"> [in] Number of elements in Z dimension. </param>
/// <param name="bIsRecordStream"> [in] true if this object is record stream. </param>
/// <param name="bIsByteAddressable"> [in] true if this object is byte addressable. </param>
///-------------------------------------------------------------------------------------------------
DatablockTemplate(
__in char * lpszTemplateName,
__in unsigned int uiElementStride,
__in unsigned int uiElementsX,
__in unsigned int uiElementsY,
__in unsigned int uiElementsZ,
__in bool bIsRecordStream,
__in bool bIsByteAddressable
);
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="lpszTemplateName"> [in] If non-null, name of the template. </param>
/// <param name="uiElementStride"> [in] The element stride in bytes. </param>
/// <param name="uiElementsX"> [in] Number of elements in X dimension. </param>
/// <param name="uiElementsY"> [in] Number of elements in Y dimension. </param>
/// <param name="uiElementsZ"> [in] Number of elements in Z dimension. </param>
/// <param name="uiPitch"> [in] The row pitch. </param>
/// <param name="bIsRecordStream"> [in] true if this object is record stream. </param>
/// <param name="bIsByteAddressable"> [in] true if this object is byte addressable. </param>
///-------------------------------------------------------------------------------------------------
DatablockTemplate(
__in char * lpszTemplateName,
__in unsigned int uiElementStride,
__in unsigned int uiElementsX,
__in unsigned int uiElementsY,
__in unsigned int uiElementsZ,
__in unsigned int uiPitch,
__in bool bIsRecordStream,
__in bool bIsByteAddressable
);
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="lpszTemplateName"> [in] If non-null, name of the template. </param>
/// <param name="pBufferDims"> [in] The element stride in bytes. </param>
/// <param name="uiNumBufferDims"> [in] Number of elements in X dimension. </param>
/// <param name="bIsRecordStream"> [in] true if this object is record stream. </param>
/// <param name="bIsByteAddressable"> [in] true if this object is byte addressable. </param>
///-------------------------------------------------------------------------------------------------
DatablockTemplate(
__in char * lpszTemplateName,
__in BUFFERDIMENSIONS * pBufferDims,
__in unsigned int uiNumBufferDims,
__in bool bIsRecordStream,
__in bool bIsByteAddressable
);
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="lpszTemplateName"> [in] If non-null, name of the template. </param>
/// <param name="uiElementStride"> [in] The element stride in bytes. </param>
/// <param name="describedParameterType"> [in] Type of the described parameter. </param>
///-------------------------------------------------------------------------------------------------
DatablockTemplate(
__in char * lpszTemplateName,
__in unsigned int uiElementStride,
__in PTASK_PARM_TYPE describedParameterType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~DatablockTemplate();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the stride. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The stride. </returns>
///-------------------------------------------------------------------------------------------------
unsigned int GetStride(UINT uiChannelIndex=DBDATA_IDX);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the number of elements in X. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The stride. </returns>
///-------------------------------------------------------------------------------------------------
unsigned int GetXElementCount(UINT uiChannelIndex=DBDATA_IDX);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the number of elements in Y. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The stride. </returns>
///-------------------------------------------------------------------------------------------------
unsigned int GetYElementCount(UINT uiChannelIndex=DBDATA_IDX);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the number of elements in Z. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The stride. </returns>
///-------------------------------------------------------------------------------------------------
unsigned int GetZElementCount(UINT uiChannelIndex=DBDATA_IDX);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the number of elements in Z. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The stride. </returns>
///-------------------------------------------------------------------------------------------------
unsigned int GetTotalElementCount(UINT uiChannelIndex=DBDATA_IDX);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the number of elements in Z. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The stride. </returns>
///-------------------------------------------------------------------------------------------------
unsigned int GetDimensionElementCount(UINT uiDim, UINT uiChannelIndex=DBDATA_IDX);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the pitch. </summary>
///
/// <remarks> Crossbac, 2/18/2013. </remarks>
///
/// <param name="uiChannelIndex"> (optional) zero-based index of the channel. </param>
///
/// <returns> The pitch. </returns>
///-------------------------------------------------------------------------------------------------
unsigned int GetPitch(UINT uiChannelIndex=DBDATA_IDX);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets buffer dimensions. </summary>
///
/// <remarks> Crossbac, 2/18/2013. </remarks>
///
/// <param name="uiChannelIndex"> (optional) zero-based index of the channel. </param>
///
/// <returns> The buffer dimensions. </returns>
///-------------------------------------------------------------------------------------------------
BUFFERDIMENSIONS GetBufferDimensions(UINT uiChannelIndex=DBDATA_IDX);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets buffer dimensions. </summary>
///
/// <remarks> Crossbac, 2/18/2013. </remarks>
///
/// <param name="uiChannelIndex"> (optional) zero-based index of the channel. </param>
///
/// <returns> The buffer dimensions. </returns>
///-------------------------------------------------------------------------------------------------
void SetBufferDimensions(BUFFERDIMENSIONS &dims, UINT uiChannelIndex=DBDATA_IDX);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the datablock byte count. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The datablock byte count. </returns>
///-------------------------------------------------------------------------------------------------
unsigned int GetDatablockByteCount(UINT nChannelIndex=0);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is byte-addressable. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if raw, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual bool IsByteAddressable();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is variable dimensioned. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if variable dimensioned, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual bool IsVariableDimensioned();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets whether the template describes byte addressable blocks. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="bIsByteAddressable"> [in] true if this object is byte addressable. </param>
///-------------------------------------------------------------------------------------------------
virtual void
SetByteAddressable(
__in bool bIsByteAddressable
);
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this template describes blocks that
/// comprise a record stream.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if the template indicates a record stream. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL DescribesRecordStream();
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this template describes blocks that
/// are used as scalar parameter in kernel functions.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if the template describes scalar parameter blocks. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL DescribesScalarParameter();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the parameter base type. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The parameter base type. </returns>
///-------------------------------------------------------------------------------------------------
virtual PTASK_PARM_TYPE GetParameterBaseType();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets the default value. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="lpvInitData"> [in] If non-null, information describing the lpv initialise. </param>
/// <param name="cbData"> [in] The data. </param>
/// <param name="nRecordCount"> [in] Number of records. </param>
/// <param name="bExplicitlyEmpty"> [in] True if this initializer describes an explicitly empty
/// initial value (0-length) We track this explicitly because
/// creating resources based on such initial values that can
/// actually be bound to device-side execution parameters
/// necessitates the creation of non-zero-size buffers, whose
/// logical length is still 0. Hence, we must decouple the
/// tracking of the "empty" property from whether the init buffer
/// is null or has no length in general. A null initializer does
/// not necessarily mean no initializer has been set! </param>
///-------------------------------------------------------------------------------------------------
virtual void
SetInitialValue(
__in void * lpvInitData,
__in UINT cbData,
__in UINT nRecordCount,
__in BOOL bExplicitlyEmpty=FALSE
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the initial value size. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The initial value size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetInitialValueSizeBytes();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the number of elements in the initial value. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The initial value size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetInitialValueElements();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the initial value. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the initial value. </returns>
///-------------------------------------------------------------------------------------------------
virtual const void * GetInitialValue();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has an initial value. </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <returns> true if initial value, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasInitialValue();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has an initial value that can be recreated easily
/// using a memset (rather than a memcpy). The object is memsettable if it has
/// an initial value whose size is less than 4 bytes, or whose initial value
/// is identical for all elements when the value is interpreted as either a 4-byte
/// int or an unsigned char.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <returns> true if initial value, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsInitialValueMemsettable(UINT szPrimitiveSize=0);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has an initial value that can be recreated easily
/// using a memset (rather than a memcpy), restricted to 8 bit objects.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <returns> true if initial value, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsInitialValueMemsettableD8();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the initial value memset stride. </summary>
///
/// <remarks> crossbac, 7/6/2012. </remarks>
///
/// <returns> The initial value memset stride. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetInitialValueMemsetStride();
///-------------------------------------------------------------------------------------------------
/// <summary> Queries if the initial value for this template is empty. </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <returns> true if an initial value is empty, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsInitialValueEmpty();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the type. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the type. </returns>
///-------------------------------------------------------------------------------------------------
virtual char * GetTemplateName();
///-------------------------------------------------------------------------------------------------
/// <summary> Set the application context callback function associated with this
/// datablock template. </summary>
///
/// <remarks> jcurrey, 5/1/2014. </remarks>
///
/// <param name="pCallback"> [in] The callback function to associate with this template. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetApplicationContextCallback(LPFNAPPLICATIONCONTEXTCALLBACK pCallback);
///-------------------------------------------------------------------------------------------------
/// <summary> Get the application context callback function associated with this
/// datablock template. </summary>
///
/// <remarks> jcurrey, 5/1/2014. </remarks>
///
/// <returns> The callback function associated with this template. </param>
///-------------------------------------------------------------------------------------------------
virtual LPFNAPPLICATIONCONTEXTCALLBACK GetApplicationContextCallback();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Default initialize. </summary>
///
/// <remarks> crossbac, 7/9/2012. </remarks>
///
/// <param name="lpszTemplateName"> [in,out] If non-null, name of the template. </param>
///-------------------------------------------------------------------------------------------------
void DefaultInitialize(char * lpszTemplateName);
/// <summary> true if this template describes a
/// record stream
/// </summary>
bool m_bRecordStream;
/// <summary> true if this template describes
/// byte-addressable datablocks
/// </summary>
bool m_bByteAddressable;
/// <summary> true if this template describes blocks
/// that are used as scalar parameters in
/// kernel invocations </summary>
bool m_bScalarParameter;
/// <summary> The parameter base type</summary>
PTASK_PARM_TYPE m_bParameterBaseType;
/// <summary> The name of datablock template,
/// user-supplied (in a hopefully
/// descriptive way)
/// </summary>
char * m_lpszTemplateName;
#if 0
/// <summary> The stride in bytes of a single
/// element in a block created with
/// this template.
/// </summary>
unsigned int m_uiStride;
/// <summary> The vui channel dimensions. </summary>
unsigned int* m_pChannelDimensions[NUM_DATABLOCK_CHANNELS];
/// <summary> Sizes of the three dimensions of elements in blocks created with this template.
/// </summary>
unsigned int m_vuiDataDimensions[MAX_DATABLOCK_DIMENSIONS];
/// <summary> The vui meta dimensions. </summary>
unsigned int m_vuiMetaDimensions[MAX_DATABLOCK_DIMENSIONS];
/// <summary> The vui template data dimensions. </summary>
unsigned int m_vuiTemplateDataDimensions[MAX_DATABLOCK_DIMENSIONS];
#endif
/// <summary> The channel dimensions, per channel type. </summary>
BUFFERDIMENSIONS m_vChannelDimensions[NUM_DATABLOCK_CHANNELS];
/// <summary> An (optional) initial value. </summary>
void * m_lpvInitialValue;
/// <summary> Size of the initial value buffer if such a buffer is extant. </summary>
UINT m_cbInitialValue;
/// <summary> Number of records in the initial value. Generally speaking this
/// value should be the same as m_cbInitialValue/stride, but we
/// insist on this redundancy to enable sanity checking. </summary>
UINT m_nInitialRecordCount;
/// <summary> true if the initial value is explicitly empty, meaning that a null
/// m_lpvInitialValue pointer or 0-valued m_cbInitialValue does not indicate
/// the absence of an initializer for this template.
/// </summary>
BOOL m_bExplicitlyEmptyInitialValue;
/// <summary> true if we have already checked whether this template
/// has an initial value that can be created with a memset
/// call (rather than a memcpy). </summary>
BOOL m_bMemsetCheckComplete;
/// <summary> true if the initial value can be created with memset.
/// Valid only if m_bMemsetCheckComplete is true.
/// </summary>
BOOL m_bMemsettableInitialValue;
/// <summary> true if the initial value can be created with memset.
/// Valid only if m_bMemsetCheckComplete is true.
/// </summary>
BOOL m_bMemsettableInitialValueD8;
/// <summary> The memsettable initial value (byte-granularity). </summary>
unsigned char m_ucMemsettableInitialValueD8;
/// <summary> The memset initial value stride. </summary>
UINT m_bMemsetInitialValueStride;
/// <summary> The application context callback. </summary>
LPFNAPPLICATIONCONTEXTCALLBACK m_pApplicationContextCallback;
};
};
#endif

Просмотреть файл

@ -1,132 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: dispatchcounter.h
//
// summary: Declares the dispatchcounter class
///-------------------------------------------------------------------------------------------------
#ifndef _DISPATCH_COUNTER_H_
#define _DISPATCH_COUNTER_H_
#include "primitive_types.h"
#include <vector>
#include <map>
#include <set>
class CHighResolutionTimer;
class CSharedPerformanceTimer;
namespace PTask {
class Task;
class Port;
class DispatchCounter {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 7/18/2013. </remarks>
///
/// <param name="pTask"> [in,out] If non-null, the task. </param>
///-------------------------------------------------------------------------------------------------
DispatchCounter(Task * pTask);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 7/18/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~DispatchCounter();
///-------------------------------------------------------------------------------------------------
/// <summary> Initialises the invocation counting diagnostics tool. This facility
/// allows us to track the number of invocations per task and compare
/// optionally against specified expected number. Useful for finding
/// races or situations where tasks are firing when they shouldn't.
/// </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void Initialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Deinitialises the invocation counting diagnostics tool. This facility
/// allows us to track the number of invocations per task and compare
/// optionally against specified expected number. Useful for finding
/// races or situations where tasks are firing when they shouldn't.
/// </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void Deinitialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps the dispatch counts for every task in the graph. </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void Report(std::ostream& ss);
///-------------------------------------------------------------------------------------------------
/// <summary> Verify dispatch counts against a prediction for every task in the graph. </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///
/// <param name="pvInvocationCounts"> [in,out] If non-null, the pv invocation counts. </param>
///
/// <returns> true if the actual and predicted match, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL Verify(std::map<std::string, UINT> * pvInvocationCounts);
///-------------------------------------------------------------------------------------------------
/// <summary> Record the fact that a task dispatch has occurred. </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void RecordDispatch();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets the expected dispatch count for the given task. The runtime will assert if
/// the actual number of dispatches for the task exceeds this number.
/// </summary>
///
/// <remarks> Crossbac, 2/28/2012. </remarks>
///
/// <param name="nDispatchCount"> Number of dispatches. </param>
///-------------------------------------------------------------------------------------------------
void SetExpectedDispatchCount(UINT nDispatchCount);
protected:
/// <summary> Lock for the dispatch count map. </summary>
static CRITICAL_SECTION m_csDispatchMap;
/// <summary> Number of dispatches per task. Keyed by name to
/// be robust to graph deletion/runtime-cleanup </summary>
static std::map<std::string, UINT> m_vDispatchMap;
/// <summary> true if dispatch counting initialized. </summary>
static BOOL m_bDispatchCountingInitialized;
/// <summary> The task. </summary>
Task * m_pTask;
/// <summary> The expected number of dispatches for this task. </summary>
UINT m_nExpectedDispatches;
/// <summary> The actual number of times this task has been dispatched. </summary>
UINT m_nActualDispatches;
};
};
#endif

Просмотреть файл

@ -1,695 +0,0 @@
//--------------------------------------------------------------------------------------
// File: dxaccelerator.h
// direct x based accelerator
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _DX_ACCELERATOR_H_
#define _DX_ACCELERATOR_H_
#include "primitive_types.h"
#include "ptdxhdr.h"
#include "datablocktemplate.h"
#include "dxcodecache.h"
#include "accelerator.h"
#include "CompiledKernel.h"
#include <vector>
namespace PTask {
class DXAccelerator : public Accelerator {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///-------------------------------------------------------------------------------------------------
DXAccelerator();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~DXAccelerator();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the open. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
HRESULT Open();
///-------------------------------------------------------------------------------------------------
/// <summary> Opens a DXAccelerator by
/// associating the DXAccelerator object with an adapter
/// and a live D3D11 device context </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="pAdapter"> [in] If non-null, the adapter. </param>
/// <param name="uiEnumerationIndex"> Zero-based index of the adapter when
/// the OS enumerates it. This is necessary because
/// the D3D11 APIs for creating a device are
/// idiosyncratic in the presence of multiple
/// adapters.</param>
///
/// <returns> S_OK on success, E_FAIL otherwise.
/// Use windows SUCCEEDED() and FAILED() macros </returns>
///-------------------------------------------------------------------------------------------------
HRESULT Open(IDXGIAdapter * pAdapter, UINT uiEnumerationIndex);
///-------------------------------------------------------------------------------------------------
/// <summary> Opens a reference device.
/// Should only be called if the programmer wants to work
/// with the runtime in an environment where no DX11 hardware
/// is present, since the reference device is very very slow.
/// Use PTask::Runtime::SetUseReferenceDevices() to enable
/// this feature.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
HRESULT OpenReferenceDevice();
///-------------------------------------------------------------------------------------------------
/// <summary> Return a pointer to the platform-specific device object. </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <returns> null if it fails, else the device. </returns>
///-------------------------------------------------------------------------------------------------
void* GetDevice();
///-------------------------------------------------------------------------------------------------
/// <summary> Return a pointer to the platform-specific device context. </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <returns> null if it fails, else the context. </returns>
///-------------------------------------------------------------------------------------------------
void* GetContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates an asynchronous context for the task. Create the cuda stream for this
/// ptask.
/// </summary>
///
/// <remarks> crossbac, 12/20/2011.
///
/// This method is required of all subclasses, and abstracts the work associated with
/// managing whatever framework-level asynchrony abstractions are supported by the
/// backend target. For example, CUDA supports the "stream", while DirectX supports
/// an ID3D11ImmediateContext, OpenCL has command queues, and so on.
/// </remarks>
///
/// <param name="pTask"> [in] non-null, the CUDA-capable acclerator to which the
/// stream is bound. </param>
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual AsyncContext *
PlatformSpecificCreateAsyncContext(
__in Task * pTask,
__in ASYNCCONTEXTTYPE eAsyncContextType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Cache the DX objects created when a shader is compiled
/// so that subsequent calls are made to compile the
/// same function, we can reuse the existing binaries. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="szFile"> [in] non-null, the file name. </param>
/// <param name="szFunc"> [in] non-null, the function name </param>
/// <param name="p"> [in] non-null, a pointer to a ID3D11ComputeShader. </param>
///-------------------------------------------------------------------------------------------------
void CachePutShader(char * szFile, char * szFunc, ID3D11ComputeShader*p);
///-------------------------------------------------------------------------------------------------
/// <summary> Check the shader cache for an existing binary made from the
/// given HLSL file and function name. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="szFile"> [in] If non-null, the file. </param>
/// <param name="szFunc"> [in] If non-null, the func. </param>
///
/// <returns> null if it fails, else the shader binary. </returns>
///-------------------------------------------------------------------------------------------------
ID3D11ComputeShader* CacheGetShader(char * szFile, char * szFunc);
///-------------------------------------------------------------------------------------------------
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
///
/// <remarks> Crossbac, 12/17/2011.
/// The function accepts a file name and an operation in the file
/// to build a binary for. For example, "foo.hlsl" and "vectoradd" will
/// compile the vectoradd() shader in foo.hlsl. On success the function
/// will create platform-specific binary and module objects that can be
/// later used by the runtime to invoke the shader code. The caller can
/// provide a buffer for compiler output, which if present, the runtime
/// will fill *iff* the compilation fails.
/// ***
/// NB: Thread group dimensions are optional parameters here but
/// *must* be used for optimal performance because DirectX requires
/// statically specified thread group sizes, and the default values
/// of 1, 1, 1 are not likely to be a good performance combination.
/// </remarks>
///
/// <param name="lpszFileName"> [in] filename+path of source. cannot be null.</param>
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null.</param>
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
/// compiler output. </param>
/// <param name="tgx"> (optional) thread group X dimensions. (see remarks)</param>
/// <param name="tgy"> (optional) thread group Y dimensions. (see remarks)</param>
/// <param name="tgz"> (optional) thread group Z dimensions. (see remarks)</param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Compile(
char * lpszFileName,
char * lpszOperation,
void ** ppPlatformSpecificBinary,
void ** ppPlatformSpecificModule,
char * lpszCompilerOutput=NULL,
int uiCompilerOutput=0,
int tgx=1,
int tgy=1,
int tgz=1
);
///-------------------------------------------------------------------------------------------------
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
///
/// <remarks> Crossbac, 12/17/2011.
///
/// The function accepts a string of source code and an operation in that source to
/// build a binary for. This is a convenience for source code that may not be stored
/// in files (e.g. dynamically generated code). On success the function will create
/// platform- specific binary and module objects that can be later used by the
/// runtime to invoke the shader code. The caller can provide a buffer for compiler
/// output, which if present, the runtime will fill *iff* the compilation fails.
///
/// NB: Thread group dimensions are optional parameters here. This is because some
/// runtimes require them statically, and some do not. DirectX requires thread-group
/// sizes to be specified statically to enable compiler optimizations that cannot be
/// used otherwise. CUDA and OpenCL allow runtime specification of these parameters.
/// </remarks>
///
/// <param name="lpszShaderCode"> [in] actual source. cannot be null. </param>
/// <param name="uiShaderCodeSize"> Size of the shader code. </param>
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null. </param>
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
/// compiler output. </param>
/// <param name="nThreadGroupSizeX"> (optional) thread group X dimensions. (see remarks) </param>
/// <param name="nThreadGroupSizeY"> (optional) thread group Y dimensions. (see remarks) </param>
/// <param name="nThreadGroupSizeZ"> (optional) thread group Z dimensions. (see remarks) </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
Compile(
__in char * lpszShaderCode,
__in UINT uiShaderCodeSize,
__in char * lpszOperation,
__in void ** ppPlatformSpecificBinary,
__in void ** ppPlatformSpecificModule,
__in char * lpszCompilerOutput,
__in int uiCompilerOutput,
__in int nThreadGroupSizeX,
__in int nThreadGroupSizeY,
__in int nThreadGroupSizeZ
);
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this accelerator's device context is current. </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <returns> true if the context is current. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsDeviceContextCurrent();
///-------------------------------------------------------------------------------------------------
/// <summary> Makes the context current. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL MakeDeviceContextCurrent();
///-------------------------------------------------------------------------------------------------
/// <summary> Releases the current context. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void ReleaseCurrentDeviceContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the D3D feature level for the hardware
/// behind this accelerator object. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> The feature level. </returns>
///-------------------------------------------------------------------------------------------------
virtual D3D_FEATURE_LEVEL GetFeatureLevel();
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this accelerator has some support for device to device transfer
/// with the given accelerator. This allows us to skip a trip through host memory
/// in many cases.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsDeviceToDeviceTransfer(Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Device to device transfer. </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
/// <param name="pSrcBuffer"> [in,out] If non-null, buffer for source data. </param>
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
DeviceToDeviceTransfer(
__inout PBuffer * pDstBuffer,
__in PBuffer * pSrcBuffer,
__in AsyncContext * pAsyncContext
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the supports device memcy. </summary>
///
/// <remarks> Crossbac, 7/12/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsDeviceMemcpy();
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if the front-end programming model
/// supports function arguments for top-level kernel
/// invocations. DirectX requires
/// top-level invocations to find their inputs
/// at global scope in constant buffers and
/// *StructuredBuffers, etc. so this function
/// always returns false for this class.
/// </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <returns> FALSE. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsFunctionArguments();
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if the underlying platform supports byval arguments
/// for kernel invocations. If the platform does support this,
/// PTask can elide explicit creation and population of
/// buffers to back these arguments, which is a performance
/// win when it is actually supported. DirectX does not
/// support this sort of thing so we always return false.</summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <returns> FALSE </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsByvalArguments();
///-------------------------------------------------------------------------------------------------
/// <summary> Synchronizes the context.
/// We could force a synchronization using
/// ID3D11Device functions (flush, end), but
/// there is no need because any attempt to reference
/// output from a PTask executed by a DXAccelerator will
/// force the completion of any predecessor operations.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="ctxt"> [in] non-null, the device ctxt. </param>
/// <param name="pTask"> (optional) [in] If non-null, the task. </param>
///
/// <returns> true. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Synchronize(Task*pTask=NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Check whether the given accelerator has a memory
/// space that is accessible from this accelerator without
/// copying explictly through host memory space. Currently,
/// CUDA interop APIs make it the case that we should be able
/// to migrate between CUDA and DirectX devices without
/// necessarily going through the host.
/// TODO: take advantage of these APIs.
/// </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <param name="p"> [in] non-null, a second accelerator. </param>
///
/// <returns> true if accessible memory space, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasAccessibleMemorySpace(Accelerator*p);
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if the runtime for this accelerator
/// supports pinned host memory. DirectX does not expose this
/// functionality through the API, so we always return false
/// from DXAccelerator.
/// </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <returns> false </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsPinnedHostMemory();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate memory on the host. Some runtimes (esp. earlier versions of CUDA)
/// require that CUDA APIs be used to allocate host-side buffers, or support
/// specialized host allocators that can help improve DMA performance.
/// AllocatePagelockedHostMemory wraps these APIs for accelerators that have runtime support
/// for this, and uses normal system services (VirtualAlloc on Windows, malloc
/// elsewhere) to satisfy requests.
/// </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <param name="uiBytes"> Number of bytes to allocate. </param>
/// <param name="pbResultPageLocked"> [in,out] If non-null, the result of whether the
/// allocated memory is page-locked is provided here. </param>
///
/// <returns> byte pointer on success, null on failure. </returns>
///-------------------------------------------------------------------------------------------------
virtual void *
AllocatePagelockedHostMemory(
UINT uiBytes,
BOOL * pbResultPageLocked
);
///-------------------------------------------------------------------------------------------------
/// <summary> Free host memory. </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <param name="pBuffer"> If non-null, the buffer. </param>
/// <param name="bPageLocked"> true if the memory was allocated in the page-locked area. </param>
///-------------------------------------------------------------------------------------------------
virtual void
FreeHostMemory(
void * pBuffer,
BOOL bPageLocked
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the adapter for this accelerator. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> null if it fails, else the adapter. </returns>
///-------------------------------------------------------------------------------------------------
virtual IDXGIAdapter* GetAdapter();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the adapter description. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> null if it fails, else the adapter description. </returns>
///-------------------------------------------------------------------------------------------------
virtual DXGI_ADAPTER_DESC* GetAdapterDesc();
///-------------------------------------------------------------------------------------------------
/// <summary> Enumerate accelerators present on the current machine
/// and populate a vector with opened Accelerator objects.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="candidates"> [in] candidates list to populate </param>
///-------------------------------------------------------------------------------------------------
static void EnumerateAccelerators(std::vector<Accelerator*> &candidates);
///-------------------------------------------------------------------------------------------------
/// <summary> Creates a device. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="pAdapter"> [in,out] If non-null, the adapter. </param>
/// <param name="DriverType"> Type of the driver. </param>
/// <param name="Software"> external software rasterizer (always NULL!). </param>
/// <param name="Flags"> creation flags to pass to DX runtime. </param>
/// <param name="pFeatureLevels"> Acceptable DX feature levels list. </param>
/// <param name="FeatureLevels"> Number of entries in feature levels list. </param>
/// <param name="SDKVersion"> The sdk version. </param>
/// <param name="ppDevice"> [out] If non-null, the device. </param>
/// <param name="pFeatureLevel"> [out] If non-null, the feature level of the device </param>
/// <param name="ppImmediateContext"> [out] If non-null, context for the device. </param>
///
/// <returns> HRESULT--use SUCCEEDED() or FAILED() macros</returns>
///-------------------------------------------------------------------------------------------------
static HRESULT WINAPI CreateDevice(
IDXGIAdapter* pAdapter,
D3D_DRIVER_TYPE DriverType,
HMODULE Software,
UINT32 Flags,
CONST D3D_FEATURE_LEVEL* pFeatureLevels,
UINT FeatureLevels,
UINT32 SDKVersion,
ID3D11Device** ppDevice,
D3D_FEATURE_LEVEL* pFeatureLevel,
ID3D11DeviceContext** ppImmediateContext
);
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this accelerator encapsulates a backend framework that provides
/// explicit APIs for managing outstanding (Asynchronous) operations. When this is
/// the case, the corresponding AsyncContext subclass can manage outstanding
/// dependences explicitly to increase concurrency and avoid syncing with the device.
/// When it is *not* the case, we must synchronize when we data to and from this
/// accelerator context and contexts that *do* support an explicit async API. For
/// example, CUDA supports the stream and event API to explicitly manage dependences
/// and we use this feature heavily to allow task dispatch to get far ahead of device-
/// side dispatch. However when data moves between CUAccelerators and other
/// accelerator classes, we must use synchronous operations or provide a way to wait
/// for outstanding dependences from those contexts to resolve. This method is used
/// to tell us whether we can create an outstanding dependence after making calls
/// that queue work, or whether we need to synchronize.
///
/// The function is not abstract because most accelerator classes don't support async
/// operations yet. In DirectX it is unnecessary because the DX runtime manages these
/// dependences under the covers, and in OpenCL the API is present, but we do not
/// yet take advantage of it. So it's simpler to override a default implementation
/// that returns FALSE.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsExplicitAsyncOperations();
protected:
/// <summary> A cache of compiled shader objects</summary>
DXCodeCache * m_pCache;
/// <summary> The ID3D11Device for this accelerator </summary>
ID3D11Device* m_pDevice;
/// <summary> The device context for this accelerator</summary>
ID3D11DeviceContext* m_pContext;
/// <summary> The 3d feature level of the backing device</summary>
D3D_FEATURE_LEVEL m_d3dFeatureLevel;
/// <summary> The adapter backing this device</summary>
IDXGIAdapter * m_pAdapter;
/// <summary> The description of the adapter provided by the OS</summary>
DXGI_ADAPTER_DESC m_desc;
///-------------------------------------------------------------------------------------------------
/// <summary> Find a shader file to compile. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="strDestPath"> shader file path </param>
/// <param name="cchDest"> size of path buffer </param>
/// <param name="strDestPath"> file name </param>
///
/// <returns> The found dxsdk shader file cch. </returns>
///-------------------------------------------------------------------------------------------------
static HRESULT
FindDXSDKShaderFileCch(
__in_ecount(cchDest) WCHAR* strDestPath,
int cchDest,
__in LPCWSTR strFilename );
///-------------------------------------------------------------------------------------------------
/// <summary> Creates a new platform specific buffer. This routine is called by CreateBuffer to
/// get a new instance of whatever buffer type corresponds to the platform
/// implementing this interface. For example, DXAccelerator will return a new
/// PDXBuffer object, where PDXBuffer is a subclass of PBuffer. The Accelerator super-
/// class can then perform the rest of the work required to initialize the PBuffer.
///
/// We only create PBuffers to provide 'physical' views of the 'logical' buffer
/// abstraction provided by the Datablock. Datablocks can have up to three different
/// channels (data, metadata, template), so consequently, each of which must be
/// backed by its own PBuffer. A PBuffer should not have to know what channel it is
/// backing, but we include that information in it's creation to simplify the
/// materialization of views between different subclasses of PBuffer.
///
/// The "proxy allocator" is present as parameter to handle two corner cases:
///
/// 1. Allocation of host-side buffers by the host-specific subclass of PBuffer
/// (PHBuffer)--for example, we prefer to use a CUDA accelerator object to
/// allocate host memory when a block will be touched by a CUDA-based PTask,
/// because we can use the faster async APIs with memory we allocate using CUDA
/// host allocation APIs. This requires that the HostAccelerator defer the host-
/// side memory allocation to the CUDA accelerator.
///
/// 2. Communication between runtimes that provide some interop support (e.g. CUDA
/// and DirectX can actually share texture objects, meaning there is no need to
/// actually allocate a new buffer to back a CUDA view that already has a DirectX
/// view, but the two accelerators must cooperate to assemble a PBuffer that
/// shares the underlying shared object.
///
/// Case 1 is implemented, while case 2 is largely unimplemented. If no proxy
/// accelerator is provided, allocation will proceed using the accelerator object
/// whose member function is being called to allocate the PBuffer.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pLogicalParent"> [in] If non-null, the datablock that is the logical
/// buffer using this 'physical' buffer to back a particular
/// channel on this accelerator. </param>
/// <param name="nDatblockChannelIndex"> Zero-based index of the channel being backed. Must be:
/// * DBDATA_IDX = 0, OR
/// * DBMETADATA_IDX = 1, OR
/// * DBTEMPLATE_IDX = 2. </param>
/// <param name="uiBufferAccessFlags"> Access flags determining what views to create. </param>
/// <param name="pProxyAllocator"> [in,out] If non-null, the proxy allocator. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual PBuffer* NewPlatformSpecificBuffer(Datablock * pLogicalParent,
UINT nDatblockChannelIndex,
BUFFERACCESSFLAGS uiBufferAccessFlags,
Accelerator * pProxyAllocator
);
///-------------------------------------------------------------------------------------------------
/// <summary> Compile with macros. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="lpszShaderCode"> [in] filename+path of source. cannot be null. </param>
/// <param name="uiShaderCodeSize"> Size of the shader code. </param>
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null. </param>
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
/// compiler output. </param>
/// <param name="pMacroDefs"> (optional) the macro defs. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///
/// ### <param name="uiCompilerOutput"> (optional) the compiler output. </param>
///-------------------------------------------------------------------------------------------------
virtual BOOL
CompileWithMacros(
__in char * lpszShaderCode,
__in UINT uiShaderCodeSize,
__in char * lpszOperation,
__out void ** ppPlatformSpecificBinary,
__out void ** ppPlatformSpecificModule,
__inout char * lpszCompilerOutput,
__in int uiCompilerOutput,
__in const void * pMacroDefs=NULL // const D3D_SHADER_MACRO*
);
private:
///-------------------------------------------------------------------------------------------------
/// <summary> Warmup pipeline. </summary>
///
/// <remarks> Crossbac, 1/28/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void WarmupPipeline();
/// <summary> true to enable, false to disable code paths that
/// directly leverage direct x asyncrony. </summary>
static BOOL s_bEnableDirectXAsyncrony;
/// <summary> true to enable, false to disable code paths that
/// try to use resource sharing support in DX11. </summary>
static BOOL s_bEnableDirectXP2PAPIs;
};
};
#endif

Просмотреть файл

@ -1,68 +0,0 @@
//--------------------------------------------------------------------------------------
// File: dxcodecache.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _DX_CODE_CACHE_H_
#define _DX_CODE_CACHE_H_
#include "primitive_types.h"
#include "ptdxhdr.h"
#include <map>
namespace PTask {
class DXCodeCache {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
DXCodeCache();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~DXCodeCache();
///-------------------------------------------------------------------------------------------------
/// <summary> Cache get. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="szFile"> [in,out] If non-null, the file. </param>
/// <param name="szFunc"> [in,out] If non-null, the func. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
ID3D11ComputeShader* CacheGet(char * szFile, char * szFunc);
///-------------------------------------------------------------------------------------------------
/// <summary> Cache put. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="szFile"> [in,out] If non-null, the file. </param>
/// <param name="szFunc"> [in,out] If non-null, the func. </param>
/// <param name="p"> [in,out] If non-null, the p. </param>
///-------------------------------------------------------------------------------------------------
void CachePut(char * szFile, char * szFunc, ID3D11ComputeShader* p);
protected:
struct ltstr {
bool operator()(std::string s1, std::string s2) const {
return strcmp(s1.c_str(), s2.c_str()) < 0;
}
};
std::map<std::string, ID3D11ComputeShader*, ltstr> m_cache;
};
};
#endif

Просмотреть файл

@ -1,347 +0,0 @@
//--------------------------------------------------------------------------------------
// File: dxtask.h
// directx based task
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _DX_TASK_H_
#define _DX_TASK_H_
#include "primitive_types.h"
#include "ptdxhdr.h"
#include "accelerator.h"
#include "dxaccelerator.h"
#include "task.h"
#include "channel.h"
#include "CompiledKernel.h"
#include <map>
#include <vector>
#include <set>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> Task running accelerator code that
/// is supported by the DirectX 11 runtime. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class DXTask : public Task {
friend class XMLReader;
friend class XMLWriter;
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="hRuntimeTerminateEvt"> Handle of the graph terminate event. </param>
/// <param name="hGraphTeardownEvent"> Handle of the stop event. </param>
/// <param name="hGraphStopEvent"> Handle of the running event. </param>
/// <param name="hGraphRunningEvent"> The graph running event. </param>
/// <param name="pCompiledKernel"> The CompiledKernel associated with this task. </param>
///-------------------------------------------------------------------------------------------------
DXTask(
__in HANDLE hRuntimeTerminateEvt,
__in HANDLE hGraphTeardownEvent,
__in HANDLE hGraphStopEvent,
__in HANDLE hGraphRunningEvent,
__in CompiledKernel * pCompiledKernel
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~DXTask();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates this object. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pAccelerators"> [in,out] [in,out] If non-null, the accelerators. </param>
/// <param name="pKernel"> [in,out] If non-null, the kernel. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual HRESULT
Create(
__in std::set<Accelerator*>& pAccelerators,
__in CompiledKernel * pKernel
);
///-------------------------------------------------------------------------------------------------
/// <summary> Perform platform-specific calls to dispatch the task. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificDispatch();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a compute geometry. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="nThreadGroupsX"> (optional) the thread groups in x. </param>
/// <param name="nThreadGroupsY"> (optional) the thread groups in y. </param>
/// <param name="nThreadGroupsZ"> (optional) the thread groups in z. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetComputeGeometry(int nThreadGroupsX=1, int nThreadGroupsY=1, int nThreadGroupsZ=1);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a block and grid size. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="grid"> The grid. </param>
/// <param name="block"> The block. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetBlockAndGridSize(PTASKDIM3 grid, PTASKDIM3 block);
///-------------------------------------------------------------------------------------------------
/// <summary> When the graph is complete, (indicated because Graph.Run was called), this method
/// is called on every task to allow tasks to perform and one-time initializations
/// that cannot be performed without knowing that the structure of the graph is now
/// static. For example, computing parameter offset maps for dispatch.
/// </summary>
///
/// <remarks> Crossbac, 1/5/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void PlatformSpecificOnGraphComplete();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an
/// individual input parameter. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="ordinal"> [in,out] The ordinal. </param>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindInput(Port * pPort, int ordinal, UINT uiActualIndex, PBuffer * pBuffer);
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an
/// individual output parameter. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="ordinal"> [in,out] The ordinal. </param>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindOutput(Port * pPort, int ordinal, UINT uiActualIndex, PBuffer * pBuffer);
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an
/// individual input parameter. </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="ordinal"> [in,out] The ordinal. </param>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindConstant(Port * pPort,
int ordinal,
UINT uiActualIndex,
PBuffer * pBuffer
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform specific finalize bindings. </summary>
///
/// <remarks> Crossbac, 1/5/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificFinalizeBindings();
///-------------------------------------------------------------------------------------------------
/// <summary> Bind shader. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL BindExecutable();
///-------------------------------------------------------------------------------------------------
/// <summary> Unbind shader. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void UnbindExecutable();
///-------------------------------------------------------------------------------------------------
/// <summary> Unbind inputs. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void UnbindInputs();
///-------------------------------------------------------------------------------------------------
/// <summary> Unbind outputs. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void UnbindOutputs();
///-------------------------------------------------------------------------------------------------
/// <summary> Unbind constants. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void UnbindConstants();
///-------------------------------------------------------------------------------------------------
/// <summary> Searches for a channel. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="uiUID"> The uid. </param>
/// <param name="p"> [in,out] If non-null, the p. </param>
/// <param name="siz"> The siz. </param>
///
/// <returns> null if it fails, else the found channel. </returns>
///-------------------------------------------------------------------------------------------------
Channel * FindChannel(UINT uiUID, Channel ** p, int siz);
///-------------------------------------------------------------------------------------------------
/// <summary> Searches for index of a given channe. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="uiUID"> The uid. </param>
/// <param name="p"> [in,out] If non-null, the p. </param>
/// <param name="siz"> The siz. </param>
///
/// <returns> The found channel index. </returns>
///-------------------------------------------------------------------------------------------------
int FindChannelIndex(UINT uiUID, Channel ** p, int siz);
///-------------------------------------------------------------------------------------------------
/// <summary> Removes the channel. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="index"> Zero-based index of the. </param>
/// <param name="p"> [in,out] If non-null, the p. </param>
/// <param name="psiz"> [in,out] If non-null, the psiz. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL RemoveChannel(int index, Channel ** p, UINT * psiz);
///-------------------------------------------------------------------------------------------------
/// <summary> Releases the channels. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="p"> [in,out] If non-null, the p. </param>
/// <param name="psiz"> [in,out] If non-null, the psiz. </param>
/// <param name="bDeallocate"> true to deallocate. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL ReleaseChannels(Channel ** p, UINT * psiz, BOOL bDeallocate);
///-------------------------------------------------------------------------------------------------
/// <summary> Estimate dispatch dimensions. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
void EstimateDispatchDimensions(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Estimate dispatch dimensions helper function. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
/// <param name="x"> [in,out] The x coordinate. </param>
/// <param name="y"> [in,out] The y coordinate. </param>
/// <param name="z"> [in,out] The z coordinate. </param>
///-------------------------------------------------------------------------------------------------
void __estimateDispatchDimensions(Datablock * pBlock, UINT& x, UINT& y, UINT& z);
/// <summary> The compute shader map </summary>
std::map<Accelerator*, ID3D11ComputeShader*> m_pCSMap;
/// <summary> The preferred number of thread
/// groups to spawn in the X dimension
/// </summary>
UINT m_nPreferredXDim;
/// <summary> The preferred number of thread
/// groups to spawn in the Y dimension
/// </summary>
UINT m_nPreferredYDim;
/// <summary> The preferred number of thread
/// groups to spawn in the Z dimension
/// </summary>
UINT m_nPreferredZDim;
/// <summary> true if the compute geometry was
/// explicitly set by a call from a
/// user program. </summary>
BOOL m_bGeometryExplicit;
/// <summary> true if we estimated the
/// geometry based on datablock template
/// or datablock properties.
/// </summary>
BOOL m_bGeometryEstimated;
/// <summary> Platform specific objects: a list of ShaderResourceView
/// pointers, reused for binding inputs on every dispatch.
/// </summary>
ID3D11ShaderResourceView** m_ppInputSRVs;
/// <summary> Platform specific objects: a list of ID3D11UnorderedAccessView
/// pointers, reused for binding outputs on every dispatch.
/// </summary>
ID3D11UnorderedAccessView ** m_ppOutputUAVs;
/// <summary> Platform specific objects: a list of ID3D11Buffer
/// pointers, reused for binding constants on every dispatch.
/// </summary>
ID3D11Buffer** m_ppConstantBuffers;
/// <summary> The p 2 p dispatch input locks. </summary>
std::set<PBuffer*> m_vP2PDispatchInputLocks;
/// <summary> The p 2 p dispatch output locks. </summary>
std::set<PBuffer*> m_vP2PDispatchOutputLocks;
};
};
#endif

Просмотреть файл

@ -1,59 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: extremetrace.h
//
// summary: Macros for extreme trace mode
///-------------------------------------------------------------------------------------------------
#ifndef __EXTREME_TRACE_H__
#define __EXTREME_TRACE_H__
#ifdef EXTREME_TRACE
#include "PTaskRuntime.h"
#define MSGSIZE 256
#define trace(x) \
if(PTask::Runtime::g_bExtremeTrace) {\
char szMsg[MSGSIZE];\
sprintf_s(szMsg, MSGSIZE, "%s\n", x);\
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
#define trace2(x, y) \
if(PTask::Runtime::g_bExtremeTrace) {\
char szMsg[MSGSIZE];\
sprintf_s(szMsg, MSGSIZE, x, y);\
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
#define trace3(x, y, z) \
if(PTask::Runtime::g_bExtremeTrace) {\
char szMsg[MSGSIZE];\
sprintf_s(szMsg, MSGSIZE, x, y, z);\
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
#define trace4(x, y, z, w) \
if(PTask::Runtime::g_bExtremeTrace) {\
char szMsg[MSGSIZE];\
sprintf_s(szMsg, MSGSIZE, x, y, z, w);\
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
#define trace5(x, y, z, w, u) \
if(PTask::Runtime::g_bExtremeTrace) {\
char szMsg[MSGSIZE];\
sprintf_s(szMsg, MSGSIZE, x, y, z, w, u);\
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
#define trace6(x, y, z, w, u, t) \
if(PTask::Runtime::g_bExtremeTrace) {\
char szMsg[MSGSIZE];\
sprintf_s(szMsg, MSGSIZE, x, y, z, w, u, t);\
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
#define trace8(x, y, z, w, u, t, r) \
if(PTask::Runtime::g_bExtremeTrace) {\
char szMsg[MSGSIZE];\
sprintf_s(szMsg, MSGSIZE, x, y, z, w, u, t, r);\
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
#else
#define trace(x)
#define trace2(x, y)
#define trace3(x, y, z)
#define trace4(x, y, z, w)
#define trace5(x, y, z, w, u)
#define trace6(x, y, z, w, u, v)
#define trace7(x, y, z, w, u, v, r)
#define trace8(x, y, z, w, u, v, r, s)
#endif
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,419 +0,0 @@
//--------------------------------------------------------------------------------------
// File: GraphInputChannel.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _GRAPH_INPUT_CHANNEL_H_
#define _GRAPH_INPUT_CHANNEL_H_
#include <stdio.h>
#include <crtdbg.h>
#include <Windows.h>
#include "datablock.h"
#include "BlockPoolOwner.h"
#include "BlockPool.h"
#include <deque>
namespace PTask {
class GraphInputChannel : public Channel, public BlockPoolOwner {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
/// <param name="pDatablockTemplate"> [in,out] If non-null, the p. </param>
/// <param name="hRuntimeTerminateEvent"> Handle of the terminate. </param>
/// <param name="hGraphTeardownEvt"> Handle of the stop. </param>
/// <param name="hGraphStopEvent"> The graph stop event. </param>
/// <param name="lpszChannelName"> [in,out] If non-null, name of the channel. </param>
/// <param name="bHasBlockPool"> the has block pool. </param>
///-------------------------------------------------------------------------------------------------
GraphInputChannel(
__in Graph * pGraph,
__in DatablockTemplate * pDatablockTemplate,
__in HANDLE hRuntimeTerminateEvent,
__in HANDLE hGraphTeardownEvt,
__in HANDLE hGraphStopEvent,
__in char * lpszChannelName,
__in BOOL bHasBlockPool
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if the channel is (or can be) connected to a data source or sink that can be
/// streamed. Generally speaking, this is a property of the primitive whose IO
/// resources are being exposed by this port; consequently this property must be set
/// explicitly by the programmer when graph structures that are stateful are
/// constructured. For example, in a sort primitive, the main input can be streamed
/// (broken into multiple blocks) only if there is a merge network downstream of the
/// node performing the sort. Code that feeds the main input port needs to know this
/// to decide whether to grow blocks until all data is present, or two push partial
/// input.
/// </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> true if the port can stream data, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CanStream();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~GraphInputChannel();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object has block pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if block pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasBlockPool();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this object is global pool. </summary>
///
/// <remarks> crossbac, 8/30/2013. </remarks>
///
/// <returns> true if global pool, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL BlockPoolIsGlobal();
///-------------------------------------------------------------------------------------------------
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
/// we have an upstream allocator (meta) port, the runtime will not create a block
/// pool for the corresponding output port. This turns out to put device-side
/// allocation on the critical path in some cases, so we provide a way to override
/// that behavior and allow a port to create a pool based on some size hints. When
/// there is a block available with sufficient space in the pool, the meta port can
/// avoid the allocation and draw from the pool.
/// </summary>
///
/// <remarks> Crossbac, 9/25/2012. </remarks>
///
/// <param name="nPoolSize"> Size of the block pool. </param>
/// <param name="nStride"> The stride. </param>
/// <param name="nDataBytes"> The data in bytes. </param>
/// <param name="nMetaBytes"> The meta in bytes. </param>
/// <param name="nTemplateBytes"> The template in bytes. </param>
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
///-------------------------------------------------------------------------------------------------
virtual void
ForceBlockPoolHint(
__in UINT nPoolSize,
__in UINT nStride,
__in UINT nDataBytes,
__in UINT nMetaBytes,
__in UINT nTemplateBytes,
__in BOOL bPageLockHostViews=FALSE,
__in BOOL bEagerDeviceMaterialize=FALSE
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
///
/// Allocation of data-blocks and platform-specific buffers can be a signficant
/// latency expense at dispatch time. We can actually preallocate output datablocks
/// and create device- side buffers at graph construction time. For each node in the
/// graph, allocate data blocks on any output ports, and create device-specific
/// buffers for all accelerators capable of executing the node.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPool(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
/// to null, and then release it. </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void
DestroyBlockPool(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> true if a block pool is active, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
IsBlockPoolActive(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the owner name. </summary>
///
/// <remarks> crossbac, 6/18/2013. </remarks>
///
/// <returns> null if it fails, else the owner name. </returns>
///-------------------------------------------------------------------------------------------------
virtual char *
GetPoolOwnerName(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets high water mark. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetHighWaterMark();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the total number of blocks owned by the pool. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The total number of blocks owned by the pool (whether they are queued or not). </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetOwnedBlockCount();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the low water mark. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetLowWaterMark();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the currently available count. </summary>
///
/// <remarks> crossbac, 6/19/2013. </remarks>
///
/// <returns> The high water mark. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetAvailableBlockCount();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles only the first pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
AllocateBlockPoolAsync(
__in std::vector<Accelerator*>* pAccelerators,
__in unsigned int uiPoolSize=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
/// Asynchronous version. Only allocates device-space buffers
/// in the first pass. Second pass queues all the copies.
/// This function handles the second pass.
/// </summary>
///
/// <remarks> crossbac, 6/15/2012. </remarks>
///
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
/// allocated in the pool may be required. </param>
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
/// Runtime::GetICBlockPoolSize() will be used to determine the
/// size of the pool. </param>
///
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
/// pooling, return false as well.
/// </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
FinalizeBlockPoolAsync(
VOID
);
///-------------------------------------------------------------------------------------------------
/// <summary> add a new block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void AddNewBlock(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> return a block to the pool. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
virtual void ReturnToPool(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pool size. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> The pool size. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetPoolSize();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetRequestsPageLocked(BOOL bPageLocked);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets request page locked. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL GetRequestsPageLocked();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets pooled block. </summary>
///
/// <remarks> crossbac, 4/29/2013. </remarks>
///
/// <returns> null if it fails, else the pooled block. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * GetPooledBlock();
///-------------------------------------------------------------------------------------------------
/// <summary> Derives an initial value datablock for this channel based on its template,
/// and pushes that datablock into this channel, blocking until there is capacity
/// for an optional timeout in milliseconds. Default timeout is infinite.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="dwTimeout"> (optional) the timeout in milliseconds. Use 0xFFFFFFFF for no
/// timeout. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PushInitializer(DWORD dwTimeout=0xFFFFFFFF);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a destination buffer for a block with an upstream
/// allocator. Succeeds only if the pool happens to have blocks
/// backed by sufficient resources in all channels that are backed.
/// </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
///
/// <returns> null if it fails, else the destination buffer. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock *
GetBlockFromPool(
__in Accelerator * pAccelerator=NULL,
__in UINT uiDataBytes=0,
__in UINT uiMetaBytes=0,
__in UINT uiTemplateBytes=0
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this channel has downstream writers. An output channel is
/// considered a writer because we must conservatively assume consumed
/// blocks will be written.
/// </summary>
///
/// <remarks> crossbac, 8/15/2013. </remarks>
///
/// <returns> true if downstream writers, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasDownstreamWriters();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
/// for this chanell in a way that is consistent with a well-formed graph. Called by
/// CheckSemantics()
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="pos"> [in,out] output string stream. </param>
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
PTask::Graph * pGraph);
/// <summary> The block pool. </summary>
BlockPool * m_pBlockPool;
/// <summary> true if this object has block pool. </summary>
BOOL m_bHasBlockPool;
};
};
#endif

Просмотреть файл

@ -1,167 +0,0 @@
//--------------------------------------------------------------------------------------
// File: GraphOutputChannel.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _GRAPH_OUTPUT_CHANNEL_H_
#define _GRAPH_OUTPUT_CHANNEL_H_
#include <stdio.h>
#include <crtdbg.h>
#include <Windows.h>
#include "datablock.h"
#include <deque>
namespace PTask {
class GraphOutputChannel : public Channel {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
/// <param name="pDatablockTemplate"> [in,out] If non-null, the p. </param>
/// <param name="hRuntimeTerminateEvent"> Handle of the terminate. </param>
/// <param name="hGraphTeardownEvt"> Handle of the stop. </param>
/// <param name="hGraphStopEvent"> The graph stop event. </param>
/// <param name="lpszChannelName"> [in,out] If non-null, name of the channel. </param>
/// <param name="bHasBlockPool"> the has block pool. </param>
///-------------------------------------------------------------------------------------------------
GraphOutputChannel(
__in Graph * pGraph,
__in DatablockTemplate * pDatablockTemplate,
__in HANDLE hRuntimeTerminateEvent,
__in HANDLE hGraphTeardownEvt,
__in HANDLE hGraphStopEvent,
__in char * lpszChannelName,
__in BOOL bHasBlockPool
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if the channel is (or can be) connected to a data source or sink that can be
/// streamed. Generally speaking, this is a property of the primitive whose IO
/// resources are being exposed by this port; consequently this property must be set
/// explicitly by the programmer when graph structures that are stateful are
/// constructured. For example, in a sort primitive, the main input can be streamed
/// (broken into multiple blocks) only if there is a merge network downstream of the
/// node performing the sort. Code that feeds the main input port needs to know this
/// to decide whether to grow blocks until all data is present, or two push partial
/// input.
/// </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> true if the port can stream data, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CanStream();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~GraphOutputChannel();
///-------------------------------------------------------------------------------------------------
/// <summary> Populate a set of tasks that are bound to this channel as consumers. Because a
/// channel may be an output channel or a multi-channel, the range of cardinality of
/// this result is [0..n]. Return the number of such tasks. Note that we cache the
/// result of this call: computing it requires a transitive closure over paths that
/// can include multi-channels and in/out routing, which in turn means traversing the
/// graph recursively. Since the result of this traversal cannot change, and the
/// traversal requires locking parts of the graph, we prefer to avoid repeating work
/// to recompute the same result.
/// </summary>
///
/// <remarks> Crossbac, 10/2/2012. </remarks>
///
/// <param name="pvTasks"> [in,out] non-null, the tasks. </param>
///
/// <returns> The number of downstream consuming tasks. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
GetDownstreamTasks(
__inout std::set<Task*>* pvTasks
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets memory spaces downstream of this channel that either *must* consume data
/// that flows through this channel, or *may* consume it. The list is non-trivial
/// because of different channel types and predication. For example, an output
/// channel has no downstream consumers, while a multi-channel can have any number.
/// Enumerating consumers is complicated by the following additional factors:
///
/// 1) The presence of channel predicates can ensure dynamically that a particular
/// bound task never actually consumes a block flowing through it.
///
/// 2) If the channel is bound to In/out ports, then we need to analyze paths of
/// length greater than 1. In fact, we need the transitive closure.
///
/// 3) A task's accelerator class may enable it to be bound to several different
/// accelerators, meaning the list of potential consumers can be greater than 1 even
/// if the channel binding structure is trivial.
///
/// Note that we cache the result of this call: computing it requires a transitive
/// closure over paths that can include multi-channels and in/out routing, which in
/// turn means traversing the graph recursively. Since the result of this traversal
/// cannot change, and the traversal requires locking parts of the graph, we prefer
/// to avoid repeating work to recompute the same result.
/// </summary>
///
/// <remarks> Crossbac, 10/2/2012. </remarks>
///
/// <param name="ppvMandatoryAccelerators"> [in,out] If non-null, the mandatory accelerators. </param>
/// <param name="ppvPotentialAccelerators"> [in,out] If non-null, the potential accelerators. </param>
///
/// <returns> The downstream memory spaces. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
EnumerateDownstreamMemorySpaces(
__inout std::set<Accelerator*>* pvMandatoryAccelerators,
__inout std::set<Accelerator*>* pvPotentialAccelerators
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this channel has downstream writers. An output channel is
/// considered a writer because we must conservatively assume consumed
/// blocks will be written.
/// </summary>
///
/// <remarks> crossbac, 8/15/2013. </remarks>
///
/// <returns> true if downstream writers, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasDownstreamWriters();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
/// for this chanell in a way that is consistent with a well-formed graph. Called by
/// CheckSemantics()
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="pos"> [in,out] output string stream. </param>
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
PTask::Graph * pGraph);
};
};
#endif

Просмотреть файл

@ -1,470 +0,0 @@
//--------------------------------------------------------------------------------------
// File: hostaccelerator.h
// host "accelerator"
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _HOST_ACCELERATOR_H_
#define _HOST_ACCELERATOR_H_
#include "primitive_types.h"
#include "datablocktemplate.h"
#include "dxcodecache.h"
#include "accelerator.h"
#include "CompiledKernel.h"
#include <string>
#include <map>
#include <vector>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> Host accelerator.
///
/// The host accelerator provides a way to execute ptask nodes
/// on the CPU. Currently, where an accelerator-based ptask
/// accepts source code and "compiles" a node with the resulting
/// binary, the host accelerator expects a dll with a
///
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class HostAccelerator : public Accelerator {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="cpuid"> The cpuid. </param>
/// <param name="lpszName"> [in,out] If non-null, the name. </param>
///-------------------------------------------------------------------------------------------------
HostAccelerator(int cpuid, char * lpszName);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~HostAccelerator();
///-------------------------------------------------------------------------------------------------
/// <summary> Open the host accelerator. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual HRESULT Open();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the device. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the device. </returns>
///-------------------------------------------------------------------------------------------------
void* GetDevice();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the context. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> null if it fails, else the context. </returns>
///-------------------------------------------------------------------------------------------------
void* GetContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates an asynchronous context for the task. Create the cuda stream for this
/// ptask.
/// </summary>
///
/// <remarks> crossbac, 12/20/2011.
///
/// This method is required of all subclasses, and abstracts the work associated with
/// managing whatever framework-level asynchrony abstractions are supported by the
/// backend target. For example, CUDA supports the "stream", while DirectX supports
/// an ID3D11ImmediateContext, OpenCL has command queues, and so on.
/// </remarks>
///
/// <param name="pTask"> [in] non-null, the CUDA-capable acclerator to which the
/// stream is bound. </param>
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual AsyncContext *
PlatformSpecificCreateAsyncContext(
__in Task * pTask,
__in ASYNCCONTEXTTYPE eAsyncContextType
);
///-------------------------------------------------------------------------------------------------
/// <summary> Cache a binary. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="szFile"> [in,out] If non-null, the file. </param>
/// <param name="szFunc"> [in,out] If non-null, the func. </param>
/// <param name="lpfn"> The lpfn. </param>
/// <param name="hModule"> The module. </param>
///-------------------------------------------------------------------------------------------------
void CachePutShader(char * szFile, char * szFunc, FARPROC lpfn, HMODULE hModule);
///-------------------------------------------------------------------------------------------------
/// <summary> Check the cache for a binary. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="szFile"> [in,out] If non-null, the file. </param>
/// <param name="szFunc"> [in,out] If non-null, the func. </param>
/// <param name="ppFunction"> [in,out] The function. </param>
/// <param name="pModule"> [in,out] The module. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL CacheGetShader(char * szFile, char * szFunc, FARPROC &ppFunction, HMODULE &pModule);
///-------------------------------------------------------------------------------------------------
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
///
/// <remarks> Crossbac, 12/17/2011.
/// </remarks>
///
/// <param name="lpszFileName"> [in] filename+path of source. cannot be null.</param>
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null.</param>
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
/// compiler output. </param>
/// <param name="tgx"> (optional) thread group X dimensions. (see remarks)</param>
/// <param name="tgy"> (optional) thread group Y dimensions. (see remarks)</param>
/// <param name="tgz"> (optional) thread group Z dimensions. (see remarks)</param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Compile(
char * lpszFileName,
char * lpszOperation,
void ** ppPlatformSpecificBinary,
void ** ppPlatformSpecificModule,
char * lpszCompilerOutput=NULL,
int uiCompilerOutput=0,
int tgx=1,
int tgy=1,
int tgz=1
);
///-------------------------------------------------------------------------------------------------
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
///
/// <remarks> Crossbac, 12/17/2011.
///
/// The function accepts a string of source code and an operation in that source to
/// build a binary for.
///
/// Currently, this is not implemented for host tasks because this involves
/// setting up infrastructure to choose a compiler and target a DLL, etc.
/// </remarks>
///
/// <param name="lpszShaderCode"> [in] actual source. cannot be null. </param>
/// <param name="uiShaderCodeSize"> Size of the shader code. </param>
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null. </param>
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
/// compiler output. </param>
/// <param name="nThreadGroupSizeX"> (optional) thread group X dimensions. (see remarks) </param>
/// <param name="nThreadGroupSizeY"> (optional) thread group Y dimensions. (see remarks) </param>
/// <param name="nThreadGroupSizeZ"> (optional) thread group Z dimensions. (see remarks) </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
Compile(
__in char * lpszShaderCode,
__in UINT uiShaderCodeSize,
__in char * lpszOperation,
__in void ** ppPlatformSpecificBinary,
__in void ** ppPlatformSpecificModule,
__in char * lpszCompilerOutput=NULL,
__in int uiCompilerOutput=0,
__in int nThreadGroupSizeX=1,
__in int nThreadGroupSizeY=1,
__in int nThreadGroupSizeZ=1
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the context current. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsDeviceContextCurrent();
///-------------------------------------------------------------------------------------------------
/// <summary> Makes the context current. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL MakeDeviceContextCurrent();
///-------------------------------------------------------------------------------------------------
/// <summary> Releases the current context. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void ReleaseCurrentDeviceContext();
///-------------------------------------------------------------------------------------------------
/// <summary> Return true if this accelerator has some support for device to device transfer
/// with the given accelerator. This allows us to skip a trip through host memory
/// in many cases.
/// </summary>
///
/// <remarks> Crossbac, 5/25/2012. </remarks>
///
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsDeviceToDeviceTransfer(Accelerator * pAccelerator);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the supports device memcy. </summary>
///
/// <remarks> Crossbac, 7/12/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsDeviceMemcpy();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the supports function arguments. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsFunctionArguments();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the supports byval arguments. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsByvalArguments();
///-------------------------------------------------------------------------------------------------
/// <summary> Synchronizes the context. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="ctxt"> [in,out] If non-null, the ctxt. </param>
/// <param name="pTask"> (optional) [in,out] If non-null, the task. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Synchronize(Task*pTask=NULL);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if 'p' has accessible memory space. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="p"> [in,out] If non-null, the p. </param>
///
/// <returns> true if accessible memory space, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasAccessibleMemorySpace(Accelerator*p);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the supports pinned host memory. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL SupportsPinnedHostMemory();
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate memory on the host. Some runtimes (esp. earlier versions of CUDA)
/// require that CUDA APIs be used to allocate host-side buffers, or support
/// specialized host allocators that can help improve DMA performance.
/// AllocatePagelockedHostMemory wraps these APIs for accelerators that have runtime support
/// for this, and uses normal system services (VirtualAlloc on Windows, malloc
/// elsewhere) to satisfy requests.
/// </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <param name="uiBytes"> Number of bytes to allocate. </param>
/// <param name="pbResultPageLocked"> [in,out] If non-null, the result of whether the
/// allocated memory is page-locked is provided here. </param>
///
/// <returns> byte pointer on success, null on failure. </returns>
///-------------------------------------------------------------------------------------------------
virtual void * AllocatePagelockedHostMemory(UINT uiBytes, BOOL * pbResultPageLocked);
///-------------------------------------------------------------------------------------------------
/// <summary> Free host memory. </summary>
///
/// <remarks> Crossbac, 12/17/2011. </remarks>
///
/// <param name="pBuffer"> If non-null, the buffer. </param>
/// <param name="bPageLocked"> true if the memory was allocated in the page-locked area. </param>
///-------------------------------------------------------------------------------------------------
virtual void
FreeHostMemory(
void * pBuffer,
BOOL bPageLocked
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the device identifier. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> The device identifier. </returns>
///-------------------------------------------------------------------------------------------------
virtual int GetDeviceId();
///-------------------------------------------------------------------------------------------------
/// <summary> Enumerate accelerators. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="devices"> [in,out] [in,out] If non-null, the devices. </param>
///-------------------------------------------------------------------------------------------------
static void EnumerateAccelerators(std::vector<Accelerator*> &devices);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate memory extent. </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///
/// <param name="ulNumberOfBytes"> The ul number of in bytes. </param>
/// <param name="ulFlags"> The ul flags. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
static void * __stdcall AllocateMemoryExtent(ULONG ulNumberOfBytes, ULONG ulFlags);
///-------------------------------------------------------------------------------------------------
/// <summary> Allocate memory extent. </summary>
///
/// <remarks> Crossbac, 1/6/2012. </remarks>
///
/// <param name="ulNumberOfBytes"> The ul number of in bytes. </param>
/// <param name="ulFlags"> The ul flags. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
static void __stdcall DeallocateMemoryExtent(void* pvMemoryExtent);
protected:
/// <summary> Identifier for the device </summary>
int m_nDeviceId;
/// <summary> The code cache </summary>
std::map<std::string, FARPROC> m_pCodeCache;
/// <summary> The module cache </summary>
std::map<std::string, HMODULE> m_pModuleCache;
///-------------------------------------------------------------------------------------------------
/// <summary> Creates a new platform specific buffer. This routine is called by CreateBuffer to
/// get a new instance of whatever buffer type corresponds to the platform
/// implementing this interface. For example, DXAccelerator will return a new
/// PDXBuffer object, where PDXBuffer is a subclass of PBuffer. The Accelerator super-
/// class can then perform the rest of the work required to initialize the PBuffer.
///
/// We only create PBuffers to provide 'physical' views of the 'logical' buffer
/// abstraction provided by the Datablock. Datablocks can have up to three different
/// channels (data, metadata, template), so consequently, each of which must be
/// backed by its own PBuffer. A PBuffer should not have to know what channel it is
/// backing, but we include that information in it's creation to simplify the
/// materialization of views between different subclasses of PBuffer.
///
/// The "proxy allocator" is present as parameter to handle two corner cases:
///
/// 1. Allocation of host-side buffers by the host-specific subclass of PBuffer
/// (PHBuffer)--for example, we prefer to use a CUDA accelerator object to
/// allocate host memory when a block will be touched by a CUDA-based PTask,
/// because we can use the faster async APIs with memory we allocate using CUDA
/// host allocation APIs. This requires that the HostAccelerator defer the host-
/// side memory allocation to the CUDA accelerator.
///
/// 2. Communication between runtimes that provide some interop support (e.g. CUDA
/// and DirectX can actually share texture objects, meaning there is no need to
/// actually allocate a new buffer to back a CUDA view that already has a DirectX
/// view, but the two accelerators must cooperate to assemble a PBuffer that
/// shares the underlying shared object.
///
/// Case 1 is implemented, while case 2 is largely unimplemented. If no proxy
/// accelerator is provided, allocation will proceed using the accelerator object
/// whose member function is being called to allocate the PBuffer.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pLogicalParent"> [in] If non-null, the datablock that is the logical
/// buffer using this 'physical' buffer to back a particular
/// channel on this accelerator. </param>
/// <param name="nDatblockChannelIndex"> Zero-based index of the channel being backed. Must be:
/// * DBDATA_IDX = 0, OR
/// * DBMETADATA_IDX = 1, OR
/// * DBTEMPLATE_IDX = 2. </param>
/// <param name="uiBufferAccessFlags"> Access flags determining what views to create. </param>
/// <param name="pProxyAllocator"> [in,out] If non-null, the proxy allocator. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual PBuffer* NewPlatformSpecificBuffer(Datablock * pLogicalParent,
UINT nDatblockChannelIndex,
BUFFERACCESSFLAGS uiBufferAccessFlags,
Accelerator * pProxyAllocator
);
};
};
#endif

Просмотреть файл

@ -1,490 +0,0 @@
//--------------------------------------------------------------------------------------
// File: HostTask.h
// Host based task
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _HOST_TASK_H_
#define _HOST_TASK_H_
#include "primitive_types.h"
#include "accelerator.h"
#include "cuaccelerator.h"
#include "task.h"
#include "channel.h"
#include "datablock.h"
#include "CompiledKernel.h"
#include <map>
#include <vector>
#include <list>
using namespace PTask;
///-------------------------------------------------------------------------------------------------
/// <summary> function signature for simple host tasks. </summary>
///
/// <remarks> Crossbac, 5/16/2012. </remarks>
///-------------------------------------------------------------------------------------------------
typedef void (__stdcall *LPFNHOSTTASK)(
UINT nArguments,
void **ppArguments
);
///-------------------------------------------------------------------------------------------------
/// <summary> function signature for host tasks that have dependences on other accelerators.
/// The BOOL array contains entries which are true if that entry corresponds to an
/// input already materialized on the dependent device, false otherwise. The
/// pvDeviceBindings array contains entries which are meaningful when the entry at
/// the same index in the BOOL array is true, and is a platform-specific device id.
/// Generated code must know how to use these IDs.
/// </summary>
///
/// <remarks> Crossbac, 5/16/2012. </remarks>
///-------------------------------------------------------------------------------------------------
typedef void (__stdcall *LPFNDEPHOSTTASK)(
UINT nArguments,
void **ppArguments,
BOOL * pbIsDependentBinding,
void ** pvDeviceBindings,
UINT nDeps,
void ** pDeps);
///-------------------------------------------------------------------------------------------------
/// <summary> Defines a structure for providing dependent accelerator context information
/// to a host task. Moving from LPFNHOSTTASK and LPFNDEPHOSTTASK approach
/// because we have to change the signature every time there is a new requirement.
/// Using a descriptor struct instead allows us to grow the structure as needed
/// without having to change a bunch of code. </summary>
///
/// <remarks> Crossbac, 2/6/2013. </remarks>
///-------------------------------------------------------------------------------------------------
typedef struct _dependent_context_t {
/*
pbDependentBindings:
pvDependentBindings:
nDeps:
pDepDevs:
pStreams: a vector of length nDeps (always 1 for you), each member of which can be typecast (in your case) to type CUstream_t.
*/
/// <summary> The number of bytes in the dependent context
/// descriptor structure.
/// </summary>
UINT cbDependentContext;
/// <summary> The number of arguments in the task argument list. </summary>
UINT nArguments;
/// <summary> The number of dependent accelerators assigned. </summary>
UINT nDependentAccelerators;
/// <summary> Reserved, pad to 16 bytes before pointer types. </summary>
UINT uiReserved0;
/// <summary> The arguments, to be typecast according to what the
/// task knows implicitly as well as the dependent accelerator
/// binding information provided in the subsequent members
/// of this structure.
/// </summary>
void **ppArguments;
/// <summary> A vector of length nArguments, specifying the datablock
/// that each argument is associated with.
/// </summary>
Datablock ** ppDatablocks;
/// <summary> a vector of BOOL, of length nArguments. If a given member is TRUE, you can expect
/// the data for the argument in question to be pre-materialized in device space.
/// </summary>
BOOL * pbIsDependentBinding;
/// <summary> a vector of length nArguments, whose members can be typecast to platform-specic
/// device objects (e.g. CUdevice): if pbDependentBindings[i] is TRUE, then
/// pvDependentBindings[i] is a valid platform specific object.
/// </summary>
void ** pvDeviceBindings;
/// <summary> a vector of length nDeps (always 1 for you), each member of which can be typecast
/// (e.g. type CUdevice).
/// </summary>
void ** pDependentDevices;
/// <summary> The streams: a vector of length nDependentAccelerators each member of which can
/// be typecast to a platform-specific asynchronous context object (e.g. type
/// CUstream_t).
/// </summary>
void ** pStreams;
/// <summary> A pointer to the PTask-assigned task name. Enables less ambiguous debug
/// output for graphs that use the same host entry point in multiple
/// places in the graph.
/// </summary>
char * lpszTaskName;
} DEPENDENTCONTEXT, *LPDEPENDENTCONTEXT;
///-------------------------------------------------------------------------------------------------
/// <summary> function signature for host tasks that have dependences on other accelerators.
/// The structure contains members which allow the task dispatch code to determine
/// whether entries are already materialized on the dependent device, as well as
/// enabling the code to get platform specific objects such as device ids and stream
/// handles where needed. Generated code must know how to use this structure.
/// Currently, the the task's BindDependentAcceleratorClass member is called
/// with the bRequestPSObjects parameter == TRUE, the code assumes the host task
/// entry point follows this form; otherwise the legacy versions above
/// (LPFNDEPHOSTTASK, LPFNHOSTTASK) are used for backward compatibility.
/// </summary>
///
/// <remarks> Crossbac, 5/16/2012. </remarks>
///-------------------------------------------------------------------------------------------------
typedef void (__stdcall *LPFNDEPHOSTTASKEX)(LPDEPENDENTCONTEXT);
namespace PTask {
static const int MAXARGS=64;
class HostTask : public Task {
friend class XMLReader;
friend class XMLWriter;
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="hRuntimeTerminateEvt"> Handle of the graph terminate event. </param>
/// <param name="hGraphTeardownEvent"> Handle of the stop event. </param>
/// <param name="hGraphStopEvent"> Handle of the running event. </param>
/// <param name="hGraphRunningEvent"> The graph running event. </param>
/// <param name="pCompiledKernel"> The CompiledKernel associated with this task. </param>
///-------------------------------------------------------------------------------------------------
HostTask(
__in HANDLE hRuntimeTerminateEvt,
__in HANDLE hGraphTeardownEvent,
__in HANDLE hGraphStopEvent,
__in HANDLE hGraphRunningEvent,
__in CompiledKernel * pCompiledKernel
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~HostTask();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates this task. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pAccelerators"> [in,out] [in,out] If non-null, the accelerators. </param>
/// <param name="pKernel"> [in,out] If non-null, the kernel. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
virtual HRESULT Create( std::set<Accelerator*>& pAccelerators, CompiledKernel * pKernel );
///-------------------------------------------------------------------------------------------------
/// <summary> Dispatches this task. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificDispatch();
///-------------------------------------------------------------------------------------------------
/// <summary> Sets the compute geometry. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="nThreadGroupsX"> (optional) the thread groups in x. </param>
/// <param name="nThreadGroupsY"> (optional) the thread groups in y. </param>
/// <param name="nThreadGroupsZ"> (optional) the thread groups in z. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetComputeGeometry(int nThreadGroupsX=1, int nThreadGroupsY=1, int nThreadGroupsZ=1);
///-------------------------------------------------------------------------------------------------
/// <summary> Sets a block and grid size. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="grid"> The grid. </param>
/// <param name="block"> The block. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetBlockAndGridSize(PTASKDIM3 grid, PTASKDIM3 block);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets a synchronization timestamp. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="p"> [in,out] If non-null, the p. </param>
///
/// <returns> The synchronization timestamp. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetSynchronizationTimestamp(Accelerator * p);
///-------------------------------------------------------------------------------------------------
/// <summary> Increment synchronise timestamp. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="p"> [in,out] If non-null, the p. </param>
///-------------------------------------------------------------------------------------------------
void IncrementSyncTimestamp(Accelerator * p);
///-------------------------------------------------------------------------------------------------
/// <summary> When the graph is complete, (indicated because Graph.Run was called), this method
/// is called on every task to allow tasks to perform and one-time initializations
/// that cannot be performed without knowing that the structure of the graph is now
/// static. For example, computing parameter offset maps for dispatch.
/// </summary>
///
/// <remarks> Crossbac, 1/5/2012. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void PlatformSpecificOnGraphComplete();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
/// </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
/// <param name="ordinal"> The ordinal. </param>
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindInput(Port * pPort,
int ordinal,
UINT uiActualIndex,
PBuffer * pBuffer
);
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an individual output
/// parameter.
/// </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
/// <param name="ordinal"> The ordinal. </param>
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindOutput(Port * pPort,
int ordinal,
UINT uiActualIndex,
PBuffer * pBuffer);
///-------------------------------------------------------------------------------------------------
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
/// </summary>
///
/// <remarks> Crossbac, 12/22/2011. </remarks>
///
/// <param name="pPort"> [in,out] If non-null, the port. </param>
/// <param name="ordinal"> The ordinal. </param>
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
/// <param name="bScalarBinding"> true to scalar binding. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificBindConstant(Port * pPort,
int ordinal,
UINT uiActualIndex,
PBuffer * pBuffer
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the platform specific finalize bindings. </summary>
///
/// <remarks> Crossbac, 1/5/2012. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL PlatformSpecificFinalizeBindings();
///-------------------------------------------------------------------------------------------------
/// <summary> Bind accelerator executable. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL BindExecutable();
///-------------------------------------------------------------------------------------------------
/// <summary> Collect migration resources. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="vblocks"> [in,out] [in,out] If non-null, the vblocks. </param>
/// <param name="vaccs"> [in,out] [in,out] If non-null, the vaccs. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
CollectMigrationResources(
__inout std::list<Datablock*> &vblocks,
__inout std::list<Accelerator*> &vaccs
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform-specific dispatch if the task has no dependences on other accelerators.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pCS"> The function pointer address for dispatch. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
PlatformSpecificDispatchNoDependences(
__in FARPROC pCS
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform-specific dispatch if the task has dependences on other accelerators.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pCS"> The function pointer address for dispatch. </param>
/// <param name="nDeps"> The number dependent assignments. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
PlatformSpecificDispatchWithDependences(
__in FARPROC pCS,
__in UINT nDeps
);
///-------------------------------------------------------------------------------------------------
/// <summary> Platform-specific dispatch if the task has dependences on other accelerators.
/// This version extends the PlatformSpecificDispatchWithDependences version
/// with the ability to provide other platform-specific objects such as stream
/// handles through a struct/descriptor based interface. Currently, this is
/// called if m_bRequestDependentPSObjects is true, otherwise, legacy versions
/// are called.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="pCS"> The function pointer address for dispatch. </param>
/// <param name="nDeps"> The number dependent assignments. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL
PlatformSpecificDispatchWithDependencesEx(
__in FARPROC pCS,
__in UINT nDeps
);
///-------------------------------------------------------------------------------------------------
/// <summary> Executes the ps dispatch enter action. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL OnPSDispatchEnter();
///-------------------------------------------------------------------------------------------------
/// <summary> Executes the ps dispatch exit action. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL OnPSDispatchExit();
/// <summary> map of host-task invocation parameter index to value </summary>
std::map<int, void*> m_pParameters;
/// <summary> map of host-task invocation parameter index to source port </summary>
std::map<int, Port*> m_pParameterPorts;
/// <summary> map of host-task invocation parameter index to datablock </summary>
std::map<int, Datablock*> m_pParameterDatablockMap;
/// <summary> map of function pointers </summary>
std::map<Accelerator*, FARPROC> m_pCSMap;
/// <summary> map of HMODULE handles </summary>
std::map<Accelerator*, HMODULE> m_pModuleMap;
/// <summary> The preferred x size </summary>
UINT m_nPreferredXDim;
/// <summary> The preferred y size </summary>
UINT m_nPreferredYDim;
/// <summary> The preferred z size </summary>
UINT m_nPreferredZDim;
/// <summary> true if the user set the geometry
/// explicitly with a call to
/// SetComputeGeometry.</summary>
BOOL m_bGeometryExplicit;
/// <summary> true if the user set the thread block
/// sizes explicitly.
/// </summary>
BOOL m_bThreadBlockSizesExplicit;
/// <summary> Size of the thread block </summary>
PTASKDIM3 m_pThreadBlockSize;
/// <summary> Size of the dispatch grid </summary>
PTASKDIM3 m_pGridSize;
void* m_ppArgs[MAXARGS];
Datablock* m_ppDatablocks[MAXARGS];
void* m_ppDeps[MAXARGS];
BOOL m_pbIsDependentBinding[MAXARGS];
void* m_pvDeviceBindings[MAXARGS];
void* m_ppStreams[MAXARGS];
};
};
#endif

Просмотреть файл

@ -1,171 +0,0 @@
/********************************************************
* hrperft.h
**********************************************************/
#ifndef _HRPERFT_H_
#define _HRPERFT_H_
// performance timers are architecture and platform
// specific. Need to define a routine to access
// the perf counters on whatever processor is in use here:
#include "windows.h"
typedef double ctrtype;
#define hpfresult(x) x.QuadPart
#define query_hpc(x) QueryPerformanceCounter(x)
#define query_freq(x) QueryPerformanceFrequency(x)
typedef long (__stdcall *LPFNtQuerySystemTime)(PLARGE_INTEGER SystemTime);
typedef enum gran_t {
gran_nanosec,
gran_usec,
gran_msec,
gran_sec
} hpf_granularity;
///-------------------------------------------------------------------------------------------------
/// <summary> High resolution timer.
/// For collecting performance measurements.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class CHighResolutionTimer {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="gran"> The granularity of the timer
/// (seconds or milliseconds). </param>
///-------------------------------------------------------------------------------------------------
CHighResolutionTimer(hpf_granularity gran);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
~CHighResolutionTimer(void);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the tick frequency of the underlying
/// counter primitive.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
double tickfreq();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the tick count. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
__int64 tickcnt();
///-------------------------------------------------------------------------------------------------
/// <summary> Resets this timer. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
void reset();
///-------------------------------------------------------------------------------------------------
/// <summary> Return the time elapsed since the
/// last reset. Optionally, reset the timer
/// as a side-effect of the query. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="reset"> true to reset. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
double elapsed(bool reset);
///-------------------------------------------------------------------------------------------------
/// <summary> Queries the system time. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="li"> The li. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL query_system_time(PLARGE_INTEGER li);
protected:
/// <summary> The granularity of the timer,
/// either seconds or milliseconds
/// </summary>
hpf_granularity m_gran;
/// <summary> the value of the underlying
/// timing primitive at the time the
/// timer was last reset.</summary>
__int64 m_start;
/// <summary> The frequency of the underlying
/// timing primitive </summary>
double m_freq;
/// <summary> Module for windows DLL for querying
/// system time getting perf counter
/// frequency.
/// </summary>
HMODULE m_hModule;
/// <summary> Function pointer for querying
/// system time
/// </summary>
LPFNtQuerySystemTime m_lpfnQuerySystemTime;
///-------------------------------------------------------------------------------------------------
/// <summary> Free resources allocated to support
/// query of system time. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
void free_query_system_time();
///-------------------------------------------------------------------------------------------------
/// <summary> Initialises the query system time. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
LPFNtQuerySystemTime init_query_system_time();
///-------------------------------------------------------------------------------------------------
/// <summary> Return the difference in milliseconds. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="lEarly"> The early. </param>
/// <param name="lLate"> The late. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
DWORD delta_milliseconds(LARGE_INTEGER lEarly, LARGE_INTEGER lLate);};
#endif

Просмотреть файл

@ -1,656 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: instrumenter.h
//
// summary: Declares the instrumenter class
///-------------------------------------------------------------------------------------------------
#ifndef __PTASK_INSTRUMENTATION_H__
#define __PTASK_INSTRUMENTATION_H__
#include "primitive_types.h"
#include "Lockable.h"
#include <stack>
#include <map>
#include <string>
#include <vector>
#include <iostream>
#include <tuple>
class CSharedPerformanceTimer;
namespace PTask {
class Instrumenter : public Lockable
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Initialize an the ad hoc instrumentation framework. Creates a singleton
/// instrumenter object.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL Initialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Shutdown the ad hoc instrumentation framework, destroys the singleton
/// instrumenter object.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL Destroy();
///-------------------------------------------------------------------------------------------------
/// <summary> Reports all measured latencies and acknowledges any outstanding
/// (incomplete) measurments . </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="ss"> [in,out] The ss. </param>
///-------------------------------------------------------------------------------------------------
static void Report(std::ostream& ss);
///-------------------------------------------------------------------------------------------------
/// <summary> Enables the instrumentation framework. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="bEnable"> true to enable, false to disable. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL Enable(BOOL bEnable);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if the adhoc instrumentation framework is enabled. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <returns> true if enabled, false if not. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL IsEnabled();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if a measurement matching 'strEventName' is in flight. In flight
/// means that a start sentinal has been pushed onto the outstanding stack
/// that has not been matched yet by a corresponding completion. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> true if in flight, false if not. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL IsInFlight(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Collect data point. </summary>
///
/// <remarks> crossbac, 8/12/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
static double CollectDataPoint(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Collect data point. </summary>
///
/// <remarks> crossbac, 8/12/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
static double
CollectDataPoint(
__in std::string& strEventName,
__out UINT &nSamples,
__out double &dMin,
__out double &dMax
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if a measurement matching 'strEventName' is complete. Note that
/// because multiple measurements matching a given name can be tracked, it is
/// possible for an event name to be both "in flight" and complete.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> true if complete, false if not. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL IsComplete(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets nesting depth for the given event name. If the nest depth is 0 it means
/// there are no measurements with the given name in flight. A depth greater than 1
/// means there is a nested measurement with the same name. This idiom is likely best
/// avoided in potentially concurrent code, since the instrumenter handles nesting
/// with a stack, which makes it difficult to disambiguate end sentinels if they are
/// not ordered explicitly by the program.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> The nesting depth. </returns>
///-------------------------------------------------------------------------------------------------
static UINT GetNestingDepth(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event start. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
static UINT RecordEventStart(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event complete. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
static UINT RecordEventComplete(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event start for an event that should have only one start sentinel,
/// but for which concurrency implies non-determinism, so many threads may attempt
/// to record the same event start. The primary example of this scenario is
/// start of data processing in PTask, which occurs as soon as the first block
/// is pushed by the user. It is simplest to record this by calling the instrumenter
/// on every exposed call to Channel::Push, with all calls after the first ignored.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
static UINT RecordSingletonEventStart(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event complete an event that should have only one start sentinel, but for
/// which concurrency implies non-determinism, so many threads may attempt to record
/// the same event start. The primary example of this scenario is start of data
/// processing in PTask, which occurs as soon as the first block is pushed by the
/// user. It is simplest to record this by calling the instrumenter on every exposed
/// call to Channel::Push, with all calls after the first ignored.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
/// <param name="bRequireOutstanding"> (Optional) true to require an outstanding entry. Some
/// stats (like first return-value materialization)
/// are very difficult to capture unambiguously, because
/// calls to record the event must be placed in common code
/// paths. Calling with this parameter set to true allows the
/// record call to fail without protest if the caller knows
/// this to be such an event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
static UINT RecordSingletonEventComplete(std::string& strEventName, BOOL bRequireOutstanding=TRUE);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event start. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
static UINT RecordEventStart(char * strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event complete. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
static UINT RecordEventComplete(char * strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Increment externally measured latency for a cumulative event. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
/// <param name="dIncrement"> Amount to increment by. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
static UINT AccumulateEventLatency(char * strEventName, double dIncrement);
///-------------------------------------------------------------------------------------------------
/// <summary> Record start for a cumulative event. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
static UINT RecordCumulativeEventStart(char * strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record cumulative event complete. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
static UINT RecordCumulativeEventComplete(char * strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event start for an event that should have only one start sentinel,
/// but for which concurrency implies non-determinism, so many threads may attempt
/// to record the same event start. The primary example of this scenario is
/// start of data processing in PTask, which occurs as soon as the first block
/// is pushed by the user. It is simplest to record this by calling the instrumenter
/// on every exposed call to Channel::Push, with all calls after the first ignored.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
static UINT RecordSingletonEventStart(char * strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event complete an event that should have only one start sentinel, but for
/// which concurrency implies non-determinism, so many threads may attempt to record
/// the same event start. The primary example of this scenario is start of data
/// processing in PTask, which occurs as soon as the first block is pushed by the
/// user. It is simplest to record this by calling the instrumenter on every exposed
/// call to Channel::Push, with all calls after the first ignored.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
/// <param name="bRequireOutstanding"> (Optional) true to require an outstanding entry. Some
/// stats (like first return-value materialization)
/// are very difficult to capture unambiguously, because
/// calls to record the event must be placed in common code
/// paths. Calling with this parameter set to true allows the
/// record call to fail without protest if the caller knows
/// this to be such an event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
static UINT RecordSingletonEventComplete(char * strEventName, BOOL bRequireOutstanding=TRUE);
///-------------------------------------------------------------------------------------------------
/// <summary> Resets this object. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void Reset();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///-------------------------------------------------------------------------------------------------
Instrumenter();
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~Instrumenter();
///-------------------------------------------------------------------------------------------------
/// <summary> Collect data point. </summary>
///
/// <remarks> crossbac, 8/12/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
double __CollectDataPoint(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Collect data point. </summary>
///
/// <remarks> crossbac, 8/12/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
double
__CollectDataPoint(
__in std::string& strEventName,
__out UINT &nSamples,
__out double &dMin,
__out double &dMax
);
///-------------------------------------------------------------------------------------------------
/// <summary> Reports all measured latencies and acknowledges any outstanding
/// (incomplete) measurments . </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="ss"> [in,out] The ss. </param>
///-------------------------------------------------------------------------------------------------
void __Report(std::ostream& ss);
///-------------------------------------------------------------------------------------------------
/// <summary> Reports all measured latencies matching the given event name.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="ss"> [in,out] The ss. </param>
/// <param name="strEventName"> [in,out] Name of the event. </param>
///-------------------------------------------------------------------------------------------------
void __ReportComplete(std::ostream& ss, std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Reports any outstanding (incomplete)
/// measurments matching the given event name.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="ss"> [in,out] The ss. </param>
/// <param name="strEventName"> [in,out] Name of the event. </param>
///-------------------------------------------------------------------------------------------------
void __ReportOutstanding(std::ostream& ss, std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Enables the instrumentation framework. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="bEnable"> true to enable, false to disable. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL __Enable(BOOL bEnable);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if the adhoc instrumentation framework is enabled. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <returns> true if enabled, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL __IsEnabled();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if a measurement matching 'strEventName' is in flight. In flight
/// means that a start sentinal has been pushed onto the outstanding stack
/// that has not been matched yet by a corresponding completion. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> true if in flight, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL __IsInFlight(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if a measurement matching 'strEventName' is complete. Note that
/// because multiple measurements matching a given name can be tracked, it is
/// possible for an event name to be both "in flight" and complete.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> true if complete, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL __IsComplete(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets nesting depth for the given event name. If the nest depth is 0 it means
/// there are no measurements with the given name in flight. A depth greater than 1
/// means there is a nested measurement with the same name. This idiom is likely best
/// avoided in potentially concurrent code, since the instrumenter handles nesting
/// with a stack, which makes it difficult to disambiguate end sentinels if they are
/// not ordered explicitly by the program.
/// </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> The nesting depth. </returns>
///-------------------------------------------------------------------------------------------------
UINT __GetNestingDepth(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event start. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
UINT __RecordEventStart(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event complete. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
UINT __RecordEventComplete(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event start. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
/// <param name="dIncrement"> Amount to increment by. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
UINT __AccumulateEventLatency(std::string& strEventName, double dIncrement);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event start. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
UINT __RecordCumulativeEventStart(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event complete. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
UINT __RecordCumulativeEventComplete(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event start. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
UINT __RecordSingletonEventStart(std::string& strEventName);
///-------------------------------------------------------------------------------------------------
/// <summary> Record event complete. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///
/// <param name="strEventName"> [in,out] Name of the event. </param>
/// <param name="bRequireOutstanding"> true to require outstanding. </param>
///
/// <returns> the new nesting depth for events matching this name. </returns>
///-------------------------------------------------------------------------------------------------
UINT __RecordSingletonEventComplete(std::string& strEventName, BOOL bRequireOutstanding);
///-------------------------------------------------------------------------------------------------
/// <summary> Resets this object. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void __Reset();
///-------------------------------------------------------------------------------------------------
/// <summary> Finalize singletons. </summary>
///
/// <remarks> Crossbac, 7/23/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void __FinalizeSingletons();
typedef std::map<std::string, std::tuple<UINT, double, double, double>> CumulativeEventMap;
BOOL m_bEnabled;
CSharedPerformanceTimer * m_pRTTimer;
std::map<std::string, std::stack<double>> m_vOutstanding;
std::map<std::string, std::vector<double>> m_vCompleted;
std::map<std::string, double> m_vSingletonCompleted;
CumulativeEventMap m_vCumulativeEvents;
static UINT m_bInitialized;
static Instrumenter * g_pInstrumenter;
};
};
#ifdef ADHOC_STATS
#define recordGraphDestroyStart() Instrumenter::RecordEventStart("GraphDestroy")
#define recordGraphDestroyLatency() Instrumenter::RecordEventComplete("GraphDestroy")
#define recordTeardownStart() Instrumenter::RecordEventStart("Teardown")
#define recordTeardownLatency() Instrumenter::RecordEventComplete("Teardown")
#define recordFirstPush() Instrumenter::RecordSingletonEventStart("ProcessData")
#define recordMaterialize() Instrumenter::RecordSingletonEventComplete("ProcessData", FALSE)
#define record_dispatch_entry() {Instrumenter::RecordSingletonEventStart("DispatchPhase"); Instrumenter::RecordCumulativeEventStart("task-dispatch"); }
#define record_dispatch_exit() {Instrumenter::RecordSingletonEventComplete("DispatchPhase"); Instrumenter::RecordCumulativeEventComplete("task-dispatch"); }
#define record_psdispatch_entry() Instrumenter::RecordCumulativeEventStart("PSDispatch")
#define record_psdispatch_exit() Instrumenter::RecordCumulativeEventComplete("PSDispatch")
#define record_psdispatch_latency(d) Instrumenter::AccumulateEventLatency("PSDispatch", d)
#define record_stream_agg_entry(x) Instrumenter::RecordCumulativeEventStart("SADispatch");
#define record_stream_agg_exit(x) Instrumenter::RecordCumulativeEventComplete("SADispatch");
#define record_schedule_entry()
#define record_schedule_exit()
#define record_wait_acc_entry()
#define record_wait_acc_exit()
#define record_sort_q_entry()
#define record_sort_q_exit()
//#define record_schedule_entry() Instrumenter::RecordCumulativeEventStart("Schedule")
//#define record_schedule_exit() Instrumenter::RecordCumulativeEventComplete("Schedule")
//#define record_wait_acc_entry() Instrumenter::RecordCumulativeEventStart("block-acc")
//#define record_wait_acc_exit() Instrumenter::RecordCumulativeEventComplete("block-acc")
//#define record_sort_q_entry() Instrumenter::RecordCumulativeEventStart("sortq")
//#define record_sort_q_exit() Instrumenter::RecordCumulativeEventComplete("sortq")
#else
#define recordTeardownStart()
#define recordTeardownLatency()
#define recordGraphDestroyStart()
#define recordGraphDestroyLatency()
#define recordFirstPush()
#define recordMaterialize()
#define record_dispatch_entry()
#define record_dispatch_exit()
#define record_psdispatch_entry()
#define record_psdispatch_exit()
#define record_psdispatch_latency(d)
#define record_schedule_entry()
#define record_schedule_exit()
#define record_wait_acc_entry()
#define record_wait_acc_exit()
#define record_sort_q_entry()
#define record_sort_q_exit()
#endif
#endif

Просмотреть файл

@ -1,112 +0,0 @@
//--------------------------------------------------------------------------------------
// File: InternalChannel.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _INTERNAL_CHANNEL_H_
#define _INTERNAL_CHANNEL_H_
#include <stdio.h>
#include <crtdbg.h>
#include <Windows.h>
#include "datablock.h"
#include "channel.h"
#include <deque>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> InternalChannel. Channel subclass specialized for Task-Task communication. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class InternalChannel : public Channel {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
/// <param name="pDatablockTemplate"> [in,out] If non-null, the datablock template. </param>
/// <param name="hRuntimeTerminateEvent"> Handle of the graph terminate event. </param>
/// <param name="hGraphTeardownEvt"> The graph teardown event. </param>
/// <param name="hGraphStopEvent"> Handle of the graph stop event. </param>
/// <param name="lpszChannelName"> [in,out] If non-null, name of the channel. </param>
/// <param name="bHasBlockPool"> the has block pool. </param>
///-------------------------------------------------------------------------------------------------
InternalChannel(
__in Graph * pGraph,
__in DatablockTemplate * pDatablockTemplate,
__in HANDLE hRuntimeTerminateEvent,
__in HANDLE hGraphTeardownEvt,
__in HANDLE hGraphStopEvent,
__in char * lpszChannelName,
__in BOOL bHasBlockPool
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~InternalChannel();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if the channel is (or can be) connected to a data source or sink that can be
/// streamed. Generally speaking, this is a property of the primitive whose IO
/// resources are being exposed by this port; consequently this property must be set
/// explicitly by the programmer when graph structures that are stateful are
/// constructured. For example, in a sort primitive, the main input can be streamed
/// (broken into multiple blocks) only if there is a merge network downstream of the
/// node performing the sort. Code that feeds the main input port needs to know this
/// to decide whether to grow blocks until all data is present, or two push partial
/// input.
/// </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> true if the port can stream data, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CanStream();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this channel has downstream writers. An output channel is
/// considered a writer because we must conservatively assume consumed
/// blocks will be written.
/// </summary>
///
/// <remarks> crossbac, 8/15/2013. </remarks>
///
/// <returns> true if downstream writers, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasDownstreamWriters();
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
/// for this chanell in a way that is consistent with a well-formed graph. Called by
/// CheckSemantics()
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="pos"> [in,out] output string stream. </param>
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
PTask::Graph * pGraph);
};
};
#endif

Просмотреть файл

@ -1,435 +0,0 @@
//--------------------------------------------------------------------------------------
// File: multichannel.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _MULTI_CHANNEL_H_
#define _MULTI_CHANNEL_H_
#include <stdio.h>
#include <crtdbg.h>
#include <Windows.h>
#include "datablock.h"
#include "ReferenceCounted.h"
#include "channel.h"
#include "PTaskRuntime.h"
#include <map>
namespace PTask {
///-------------------------------------------------------------------------------------------------
/// <summary> Bundled channel class. Any block pushed into this channel is pushed into
/// multiple bundled channels.
/// </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class MultiChannel : public Channel
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="pDatablockTemplate"> [in] If non-null, the datablock template. </param>
/// <param name="hRuntimeTerminateEvent"> Handle of the runtime terminate event. </param>
/// <param name="hGraphTeardownEvt"> The graph teardown event. </param>
/// <param name="hGraphStopEvent"> Handle of the graph stop event. </param>
/// <param name="lpszChannelName"> [in] If non-null, name of the channel. </param>
/// <param name="bHasBlockPool"> the has block pool. </param>
///-------------------------------------------------------------------------------------------------
MultiChannel(
__in Graph * pGraph,
__in DatablockTemplate * pDatablockTemplate,
__in HANDLE hRuntimeTerminateEvent,
__in HANDLE hGraphTeardownEvt,
__in HANDLE hGraphStopEvent,
__in char * lpszChannelName,
__in BOOL bHasBlockPool
);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~MultiChannel();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if the channel is (or can be) connected to a data source or sink that can be
/// streamed. Generally speaking, this is a property of the primitive whose IO
/// resources are being exposed by this port; consequently this property must be set
/// explicitly by the programmer when graph structures that are stateful are
/// constructured. For example, in a sort primitive, the main input can be streamed
/// (broken into multiple blocks) only if there is a merge network downstream of the
/// node performing the sort. Code that feeds the main input port needs to know this
/// to decide whether to grow blocks until all data is present, or two push partial
/// input.
/// </summary>
///
/// <remarks> Crossbac, 12/20/2011. </remarks>
///
/// <returns> true if the port can stream data, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CanStream();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if channel is ready. This has a different meaning depending on the channel
/// subtype in question, but in general means "is the channel ready to produce or
/// consume datablocks?".
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="type"> (optional) the type of the channel. </param>
///
/// <returns> true if ready, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL IsReady(CHANNELENDPOINTTYPE type=CE_DST);
///-------------------------------------------------------------------------------------------------
/// <summary> Pulls a datablock from the channel, potentially timing out after dwTimeout
/// milliseconds.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="dwTimeout"> (optional) the timeout in milliseconds. Use 0xFFFFFFFF for no
/// timeout. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Pull(DWORD dwTimeout=0xFFFFFFFF);
///-------------------------------------------------------------------------------------------------
/// <summary> Returns the first available datablock on the channel without removing it. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> null if it fails, else the currently available datablock object. </returns>
///-------------------------------------------------------------------------------------------------
virtual Datablock * Peek();
///-------------------------------------------------------------------------------------------------
/// <summary> Pushes a datablock into this channel, blocking until there is capacity
/// for an optional timeout in milliseconds. Default timeout is infinite.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
/// <param name="dwTimeout"> (optional) the timeout in milliseconds. Use 0xFFFFFFFF for no
/// timeout. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL Push(Datablock* pBlock, DWORD dwTimeout=0xFFFFFFFF);
///-------------------------------------------------------------------------------------------------
/// <summary>
/// Sets the capacity of the channel, which is the maximum number of datablocks it can queue
/// before subsequent calls to push will block.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="nCapacity"> The capacity. </param>
///-------------------------------------------------------------------------------------------------
virtual void SetCapacity(UINT nCapacity);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the capacity. </summary>
///
/// <remarks> Crossbac, 7/10/2013. </remarks>
///
/// <returns> The capacity. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT GetCapacity();
///-------------------------------------------------------------------------------------------------
/// <summary> Bind this channel to a port. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="pPort"> [in] non-null, the port to bind. </param>
/// <param name="type"> (optional) the type of the channel. </param>
///-------------------------------------------------------------------------------------------------
virtual void BindPort(Port * pPort, CHANNELENDPOINTTYPE type);
///-------------------------------------------------------------------------------------------------
/// <summary> Unbind a port from this channel. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="type"> (optional) the type of the channel. </param>
///
/// <returns> null if it fails, else. </returns>
///-------------------------------------------------------------------------------------------------
virtual Port * UnbindPort(CHANNELENDPOINTTYPE type);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the port to which this channel is bound. Lock not required because we assume
/// this is set at creation, rather than after the graph has entered the running
/// state.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <param name="type"> (optional) the type of the channel. </param>
///
/// <returns> null if it fails, else the bound port. </returns>
///-------------------------------------------------------------------------------------------------
virtual Port * GetBoundPort(CHANNELENDPOINTTYPE type);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the datablock template associated with this port. Lock not required because
/// we assume this is set at creation, rather than after the graph has entered the
/// running state.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> null if it fails, else the template. </returns>
///-------------------------------------------------------------------------------------------------
DatablockTemplate * GetTemplate();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the current queue depth. </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///
/// <returns> The queue depth. </returns>
///-------------------------------------------------------------------------------------------------
virtual size_t GetQueueDepth();
///-------------------------------------------------------------------------------------------------
/// <summary>
/// Drains this channels queue, releasing references to the blocks in the queue.
/// </summary>
///
/// <remarks> Crossbac, 12/19/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual void Drain();
///-------------------------------------------------------------------------------------------------
/// <summary> Coalesce channel. </summary>
///
/// <remarks> Crossbac, 1/20/2012. </remarks>
///
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
///-------------------------------------------------------------------------------------------------
void CoalesceChannel(Channel * pChannel);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the coalesced channel map. </summary>
///
/// <remarks> crossbac, 4/18/2012. </remarks>
///
/// <returns> null if it fails, else the coalesced channel map. </returns>
///-------------------------------------------------------------------------------------------------
std::map<UINT, Channel*>* GetCoalescedChannelMap();
///-------------------------------------------------------------------------------------------------
/// <summary> Populate a set of tasks that are bound to this channel as consumers. Because a
/// channel may be an output channel or a multi-channel, the range of cardinality of
/// this result is [0..n]. Return the number of such tasks. Note that we cache the
/// result of this call: computing it requires a transitive closure over paths that
/// can include multi-channels and in/out routing, which in turn means traversing the
/// graph recursively. Since the result of this traversal cannot change, and the
/// traversal requires locking parts of the graph, we prefer to avoid repeating work
/// to recompute the same result.
/// </summary>
///
/// <remarks> Crossbac, 10/2/2012. </remarks>
///
/// <param name="pvTasks"> [in,out] non-null, the tasks. </param>
///
/// <returns> The number of downstream consuming tasks. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
GetDownstreamTasks(
__inout std::set<Task*>* pvTasks
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets memory spaces downstream of this channel that either *must* consume data
/// that flows through this channel, or *may* consume it. The list is non-trivial
/// because of different channel types and predication. For example, an output
/// channel has no downstream consumers, while a multi-channel can have any number.
/// Enumerating consumers is complicated by the following additional factors:
///
/// 1) The presence of channel predicates can ensure dynamically that a particular
/// bound task never actually consumes a block flowing through it.
///
/// 2) If the channel is bound to In/out ports, then we need to analyze paths of
/// length greater than 1. In fact, we need the transitive closure.
///
/// 3) A task's accelerator class may enable it to be bound to several different
/// accelerators, meaning the list of potential consumers can be greater than 1 even
/// if the channel binding structure is trivial.
///
/// Note that we cache the result of this call: computing it requires a transitive
/// closure over paths that can include multi-channels and in/out routing, which in
/// turn means traversing the graph recursively. Since the result of this traversal
/// cannot change, and the traversal requires locking parts of the graph, we prefer
/// to avoid repeating work to recompute the same result.
/// </summary>
///
/// <remarks> Crossbac, 10/2/2012. </remarks>
///
/// <param name="ppvMandatoryAccelerators"> [in,out] If non-null, the mandatory accelerators. </param>
/// <param name="ppvPotentialAccelerators"> [in,out] If non-null, the potential accelerators. </param>
///
/// <returns> The downstream memory spaces. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL
EnumerateDownstreamMemorySpaces(
__inout std::set<Accelerator*>* pvMandatoryAccelerators,
__inout std::set<Accelerator*>* pvPotentialAccelerators
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this channel has downstream writers. An output channel is
/// considered a writer because we must conservatively assume consumed
/// blocks will be written.
/// </summary>
///
/// <remarks> crossbac, 8/15/2013. </remarks>
///
/// <returns> true if downstream writers, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasDownstreamWriters();
///-------------------------------------------------------------------------------------------------
/// <summary> Find the maximal capacity downstream port/channel path starting at this channel.
/// </summary>
///
/// <remarks> crossbac, 1/3/2014. </remarks>
///
/// <param name="vTasksVisited"> [in,out] [in,out] If non-null, the tasks visited. </param>
/// <param name="vPath"> [in,out] list of channels along the maximal path. </param>
///
/// <returns> The found maximal downstream capacity. </returns>
///-------------------------------------------------------------------------------------------------
virtual UINT
FindMaximalDownstreamCapacity(
__inout std::set<Task*>& vTasksVisited,
__inout std::vector<Channel*>& vPath
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this channel has any non trivial predicates. </summary>
///
/// <remarks> crossbac, 7/3/2014. </remarks>
///
/// <returns> true if non trivial predicate, false if not. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL HasNonTrivialPredicate();
///-------------------------------------------------------------------------------------------------
/// <summary> Return the super-set of all "control signals of interest" for this graph object.
/// A control signal is "of interest" if the behavior of this object is is predicated
/// in some way by the presence or absence of a given signal. This function returns
/// the bit-wise OR of all such signals.
/// </summary>
///
/// <remarks> crossbac, 7/7/2014. </remarks>
///
/// <returns> The bitwise OR of all found control signals of interest. </returns>
///-------------------------------------------------------------------------------------------------
virtual CONTROLSIGNAL GetControlSignalsOfInterest();
///-------------------------------------------------------------------------------------------------
/// <summary> Query if this multi-channel has an exposed component channel. </summary>
///
/// <remarks> crossbac, 7/7/2014. </remarks>
///
/// <returns> true if exposed component channel, false if not. </returns>
///-------------------------------------------------------------------------------------------------
BOOL HasExposedComponentChannel();
///-------------------------------------------------------------------------------------------------
/// <summary> Channel.toString() </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="os"> [in,out] The operating system. </param>
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
///
/// <returns> The shifted result. </returns>
///-------------------------------------------------------------------------------------------------
friend std::ostream& operator<<(std::ostream &os, Channel * pChannel);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
/// for this chanell in a way that is consistent with a well-formed graph. Called by
/// CheckSemantics()
/// </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="pos"> [in,out] output string stream. </param>
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
PTask::Graph * pGraph);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the downstream readonly port count. </summary>
///
/// <remarks> Crossbac, 2/6/2012. </remarks>
///
/// <returns> The downstream readonly port count. </returns>
///-------------------------------------------------------------------------------------------------
UINT GetDownstreamReadonlyPortCount();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the downstream writer port count. </summary>
///
/// <remarks> Crossbac, 2/6/2012. </remarks>
///
/// <returns> The downstream writer port count. </returns>
///-------------------------------------------------------------------------------------------------
UINT GetDownstreamWriterPortCount();
/// <summary> The channel map. </summary>
std::map<UINT, Channel*> m_pChannelMap;
};
};
#endif

Просмотреть файл

@ -1,54 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: nvtxmacros.h
//
// summary: Declares the nvtxmacros class
///-------------------------------------------------------------------------------------------------
#ifndef __NVTX_MACROS_H__
#define __NVTX_MACROS_H__
#if defined(NVPROFILE) && defined(CUDA_SUPPORT)
#include "nvToolsExt.h"
extern BOOL gbnvtxldok;
extern BOOL gbvntxinit;
#define DECLARE_NVTX_GLOBALS() \
BOOL gbnvtxldok = FALSE; \
BOOL gbvntxinit = FALSE;
#define INITNVTX() initnvtx()
#define MARKEVENT(x) if(gbnvtxldok) nvtxMark(x)
#define NAMETHREAD(x) if(gbnvtxldok) nvtxNameOsThread(GetCurrentThreadId(),(x))
#define MARKRANGEENTER(x) if(gbnvtxldok) nvtxRangePush(x)
#define MARKRANGEEXIT() if(gbnvtxldok) nvtxRangePop()
#define MARKTASKENTER(x) if(gbnvtxldok) nvtxRangePushA(x)
#define MARKTASKEXIT() if(gbnvtxldok) nvtxRangePop()
#define DECLARE_NVTX_INIT() \
void initnvtx() { \
if(!gbvntxinit) { \
gbnvtxldok = FALSE; \
HANDLE hNVTXlib = LoadLibrary(L"nvToolsExt64_1.dll"); \
if(hNVTXlib != NULL) { \
MARKEVENT(L"initnvtx"); \
gbnvtxldok = TRUE; \
} \
gbvntxinit = TRUE; \
} \
}
#else
#define DECLARE_NVTX_GLOBALS()
#define DECLARE_NVTX_INIT()
#define INITNVTX()
#define MARKEVENT(x)
#define NAMETHREAD(x)
#define MARKRANGEENTER(x)
#define MARKRANGEEXIT()
#define MARKTASKENTER(x)
#define MARKTASKEXIT()
#endif
#endif

Просмотреть файл

@ -1,12 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: oclhdr.h
//
// summary: Declares the oclhdr class
///-------------------------------------------------------------------------------------------------
#ifndef __OCLHDR_H__
#define __OCLHDR_H__
#ifdef OPENCL_SUPPORT
#include "CL\cl.h"
#endif
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,33 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: ptaskapi.h
//
// summary: Includes of public headers for ptask. Do not include this within the
// ptask implementation!
///-------------------------------------------------------------------------------------------------
#ifndef __PTASK_PUBLIC_API_H__
#define __PTASK_PUBLIC_API_H__
#include "primitive_types.h"
#include "PTaskRuntime.h"
#include "ptaskutils.h"
#include "accelerator.h"
#include "graph.h"
#include "datablock.h"
#include "datablocktemplate.h"
#include "CompiledKernel.h"
#include "hrperft.h"
#include "task.h"
#include "graphInputChannel.h"
#include "graphOutputChannel.h"
#include "internalChannel.h"
#include "InitializerChannel.h"
#include "multichannel.h"
#include "InputPort.h"
#include "OutputPort.h"
#include "StickyPort.h"
#include "MetaPort.h"
#include "hrperft.h"
#include "shrperft.h"
#endif

Просмотреть файл

@ -1,34 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: ptasklynx.h
//
// summary: Declares the lynx conditional compilation macros
///-------------------------------------------------------------------------------------------------
#ifndef __PTASK_LYNX_H__
#define __PTASK_LYNX_H___
#ifdef PTASK_LYNX_INSTRUMENTATION
#include "lynx.h"
#define init_task_code_instrumentation(x) (x)->InitializeInstrumentation()
#define finalize_task_code_instrumentation(x) (x)->FinalizeInstrumentation()
#else
#define init_task_code_instrumentation(x)
#define finalize_task_code_instrumentation(x)
#endif
#ifdef REPORT_TIMING
#include "shrperft.h"
#define ptasklynx_start_timer() \
CSharedPerformanceTimer * timer = new CSharedPerformanceTimer(gran_msec, true); \
double start = timer->elapsed(false);
#define ptasklynx_stop_timer() \
error = cuCtxSynchronize(); \
PTASSERT(error == CUDA_SUCCESS); \
double end = timer->elapsed(false); \
double runtime = end - start; \
std::cout << m_lpszTaskName << "\t" << runtime << std::endl; \
delete timer;
#else
#define ptasklynx_start_timer()
#define ptasklynx_stop_timer()
#endif
#endif

Просмотреть файл

@ -1,197 +0,0 @@
//--------------------------------------------------------------------------------------
// File: ptaskutils.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _PTASK_UTILS_H_
#define _PTASK_UTILS_H_
#include <Windows.h>
#include "primitive_types.h"
namespace PTask {
static const unsigned int DEFAULT_GROUP_SIZE = 256;
///-------------------------------------------------------------------------------------------------
/// <summary> Values that represent how to interpret raw buffer contents when using
/// DUMP_INTERMEDIATE_BLOCKS for debugging.
/// </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
typedef enum dumptype_t {
dt_raw = 0,
dt_float = 1,
dt_int = 2,
dt_double = 3
} DEBUGDUMPTYPE;
class ptaskutils
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> derive the best group size for dispatch. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <param name="group_size"> Size of the group. </param>
/// <param name="global_size"> Size of the global. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
static size_t
ptaskutils::roundup(
int group_size,
int global_size
);
///-------------------------------------------------------------------------------------------------
/// <summary> Return a unique integer identifier. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
static unsigned int
ptaskutils::nextuid(
void
);
///-------------------------------------------------------------------------------------------------
/// <summary> Select the accelerator class for the given file, assumed to contain shader/kernel
/// code.
/// </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <param name="szFile"> The file. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
static ACCELERATOR_CLASS
ptaskutils::SelectAcceleratorClass(
const char * szFile
);
///-------------------------------------------------------------------------------------------------
/// <summary> Loads file into memory. </summary>
///
/// <remarks> Crossbac, 1/28/2013. </remarks>
///
/// <param name="hFile"> The file. </param>
/// <param name="ppMemory"> [in,out] If non-null, the memory. </param>
/// <param name="puiBytes"> [in,out] If non-null, the pui in bytes. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
LoadFileIntoMemory(
const HANDLE hFile,
void ** ppMemory,
UINT * puiBytes
);
///-------------------------------------------------------------------------------------------------
/// <summary> Loads file into memory. </summary>
///
/// <remarks> Crossbac, 1/28/2013. </remarks>
///
/// <param name="szFile"> The file. </param>
/// <param name="ppMemory"> [in,out] If non-null, the memory. </param>
/// <param name="puiBytes"> [in,out] If non-null, the pui in bytes. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
LoadFileIntoMemory(
const char * szFile,
void ** ppMemory,
UINT * puiBytes
);
///-------------------------------------------------------------------------------------------------
/// <summary> Loads file into memory. </summary>
///
/// <remarks> Crossbac, 1/28/2013. </remarks>
///
/// <param name="szFile"> The file. </param>
/// <param name="ppMemory"> [in,out] If non-null, the memory. </param>
/// <param name="puiBytes"> [in,out] If non-null, the pui in bytes. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
LoadFileIntoMemory(
const WCHAR * pwszFile,
void ** ppMemory,
UINT * puiBytes
);
///-------------------------------------------------------------------------------------------------
/// <summary> Returns the number of set signal codes in a control signal. </summary>
///
/// <remarks> Crossbac, 2/14/2013. </remarks>
///
/// <param name="luiSignalWord"> The lui signal word. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
static UINT
SignalCount(
__in CONTROLSIGNAL luiSignalWord
);
///-------------------------------------------------------------------------------------------------
/// <summary> get the index of the first set signal if any. </summary>
///
/// <remarks> Crossbac, 2/14/2013. </remarks>
///
/// <param name="luiSignalWord"> The lui signal word. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
static int
GetFirstSignalIndex(
__in CONTROLSIGNAL luiSignalWord
);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes utils. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void initialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Cleans up utils. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///-------------------------------------------------------------------------------------------------
static void cleanup();
/// <summary> Unique id lock </summary>
static CRITICAL_SECTION m_csUIDLock;
/// <summary> The uid counter </summary>
static unsigned int m_uiUIDCounter;
/// <summary> true if utils is initialized </summary>
static BOOL m_bInitialized;
};
};
#endif

Просмотреть файл

@ -1,23 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: ptdxhdr.h
//
// summary: include DirectX headers required for given build environment
///-------------------------------------------------------------------------------------------------
#pragma once
#if (_MSC_VER > 1600)
// apparently d3dx11.h is obsolete in win8
#include <d3dcommon.h>
#include <d3d11.h>
#include <d3dcompiler.h>
#else
#include <d3dcommon.h>
#include <d3d11.h>
#ifdef DIRECTXCOMPILERSUPPORT
#include <d3dcompiler.h>
#include <d3dx11.h>
#endif
#endif

Просмотреть файл

@ -1,274 +0,0 @@
//--------------------------------------------------------------------------------------
// File: ptgc.h
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _PTGC_H_
#define _PTGC_H_
#include <deque>
#include "Lockable.h"
namespace PTask {
class Datablock;
static const UINT DEFAULT_DATABLOCK_GC_THREADS = 1;
///-------------------------------------------------------------------------------------------------
/// <summary> Datablock garbage collector. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class GarbageCollector : public Lockable
{
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="nGCThreads"> (optional) the gc threads. </param>
///-------------------------------------------------------------------------------------------------
GarbageCollector(UINT nGCThreads=DEFAULT_DATABLOCK_GC_THREADS);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~GarbageCollector();
///-------------------------------------------------------------------------------------------------
/// <summary> Force GC. </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void ForceGC();
///-------------------------------------------------------------------------------------------------
/// <summary> Force a GC sweep that is targeted at a particular memory space. Can be called under
/// low-mem conditions by a failing attempt to allocate device memory. Forcing a
/// full GC sweep from that calling context is impractical because a full sweep
/// requires locks we cannot acquire without breaking the lock-ordering discipline.
/// However a device-specific allocation context can be assumed to hold a lock on the
/// accelerator for which we are allocating, making it safe to sweep the GC queue
/// and free device buffers for that memspace *only* without deleting the parent blocks.
/// </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void ForceGC(UINT uiMemSpaceId);
///-------------------------------------------------------------------------------------------------
/// <summary> Queue a datablock for garbage collection. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
static void QueueForGC(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Destroys the GC. </summary>
///
/// <remarks> Crossbac, 3/18/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void DestroyGC();
///-------------------------------------------------------------------------------------------------
/// <summary> Creates the GC. </summary>
///
/// <remarks> Crossbac, 3/18/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void CreateGC();
///-------------------------------------------------------------------------------------------------
/// <summary> Reports the current state of the queue to the console in some detail.
/// If we are getting tight on memory, this can be a handy tool for checking
/// whether more aggressive GC would help the workload.
/// </summary>
///
/// <remarks> crossbac, 9/7/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void Report();
///-------------------------------------------------------------------------------------------------
/// <summary> Shuts down this object and frees any resources it is using. </summary>
///
/// <remarks> Crossbac, 3/1/2012. </remarks>
///-------------------------------------------------------------------------------------------------
void Shutdown();
#ifdef DEBUG
///-------------------------------------------------------------------------------------------------
/// <summary> Notifies an allocation. </summary>
///
/// <remarks> Crossbac, 7/1/2013. </remarks>
///
/// <param name="pNewBlock"> [in,out] If non-null, the new block. </param>
///-------------------------------------------------------------------------------------------------
static void NotifyAllocation(Datablock * pNewBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Notifies an allocation. </summary>
///
/// <remarks> Crossbac, 7/1/2013. </remarks>
///
/// <param name="pNewBlock"> [in,out] If non-null, the new block. </param>
///-------------------------------------------------------------------------------------------------
void __NotifyAllocation(Datablock * pNewBlock);
#endif
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Force GC. </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void _ForceGC();
///-------------------------------------------------------------------------------------------------
/// <summary> Force a GC sweep that is targeted at a particular memory space. Can be called under
/// low-mem conditions by a failing attempt to allocate device memory. Forcing a
/// full GC sweep from that calling context is impractical because a full sweep
/// requires locks we cannot acquire without breaking the lock-ordering discipline.
/// However a device-specific allocation context can be assumed to hold a lock on the
/// accelerator for which we are allocating, making it safe to sweep the GC queue
/// and free device buffers for that memspace *only* without deleting the parent blocks.
/// </summary>
///
/// <remarks> crossbac, 6/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void _ForceGC(UINT uiMemSpaceId);
///-------------------------------------------------------------------------------------------------
/// <summary> Queue a datablock for garbage collection. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
///-------------------------------------------------------------------------------------------------
void _QueueForGC(Datablock * pBlock);
///-------------------------------------------------------------------------------------------------
/// <summary> Reports the current state of the queue to the console in some detail.
/// If we are getting tight on memory, this can be a handy tool for checking
/// whether more aggressive GC would help the workload.
/// </summary>
///
/// <remarks> crossbac, 9/7/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void _Report();
///-------------------------------------------------------------------------------------------------
/// <summary> GC thread. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <param name="p"> The p. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
static DWORD WINAPI PTaskGCThread(LPVOID p);
///-------------------------------------------------------------------------------------------------
/// <summary> The garbage collector thread proc. </summary>
///
/// <remarks> Crossbac, 12/28/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
DWORD GarbageCollectorThread();
/// <summary> The queue of blocks to delete </summary>
std::deque<Datablock*> m_vQ;
/// <summary> Handle of the work available event. Set when the queue is non-empty. </summary>
HANDLE m_hWorkAvailable;
/// <summary> Handle of the quiescent event--set when a sweep is not in progress. </summary>
HANDLE m_hQuiescent;
/// <summary> Handle of the gc threads </summary>
HANDLE * m_vGCThreads;
/// <summary> The number of gc threads. </summary>
UINT m_nGCThreads;
/// <summary> Handle of the gc global shutdown event. </summary>
HANDLE m_hGCShutdown;
/// <summary> Handle of the global shutdown event. </summary>
HANDLE m_hRuntimeTerminateEvent;
/// <summary> true if the GC thread is alive </summary>
BOOL m_bAlive;
/// <summary> true to shutdown in progress. </summary>
BOOL m_bShutdownInProgress;
/// <summary> true to shutdown complete. </summary>
BOOL m_bShutdownComplete;
/// <summary> true to quiescent. </summary>
BOOL m_bQuiescent;
#ifdef DEBUG
/// <summary> Debug mode--keep a list of
/// things that have already been queued or
/// deleted to ensure we don't double free. </summary>
std::set<Datablock*> m_vQueued;
std::set<Datablock*> m_vDeleted;
CRITICAL_SECTION m_csGCTracker;
#define ptgc_init() InitializeCriticalSection(&m_csGCTracker);
#define ptgc_deinit() DeleteCriticalSection(&m_csGCTracker);
#define ptgc_lock() EnterCriticalSection(&m_csGCTracker);
#define ptgc_unlock() LeaveCriticalSection(&m_csGCTracker);
#define ptgc_check_double_q(x) assert(m_vQueued.find(x)==m_vQueued.end())
#define ptgc_check_double_free(x) assert(m_vDeleted.find(x)==m_vDeleted.end())
#define ptgc_record_q(x) m_vQueued.insert(x)
#define ptgc_record_free(x) { m_vDeleted.insert(x); m_vQueued.erase(x); }
#define ptgc_reset() { ptgc_lock(); m_vQueued.clear(); m_vDeleted.clear(); ptgc_unlock(); }
#define ptgc_new(x) { GarbageCollector::NotifyAllocation(x); }
#else
#define ptgc_init()
#define ptgc_deinit()
#define ptgc_lock()
#define ptgc_unlock()
#define ptgc_check_double_q(x)
#define ptgc_check_double_free(x)
#define ptgc_record_q(x)
#define ptgc_record_free(x)
#define ptgc_reset()
#define ptgc_new(x)
#endif
static CRITICAL_SECTION m_csGlobalGCPtr;
static GarbageCollector * g_pGarbageCollector;
};
};
#endif

Просмотреть файл

@ -1,132 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: ptlock.h
//
// summary: Declares the ptlock class
///-------------------------------------------------------------------------------------------------
#ifndef __PTLOCK_H__
#define __PTLOCK_H__
#include <stdio.h>
#include <crtdbg.h>
#include "Lockable.h"
#include <assert.h>
namespace PTask {
class PTLock : public Lockable {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 12/27/2011. </remarks>
///
/// <param name="lpszProtectedObjectName"> [in] If non-null, name of the protected object. </param>
///-------------------------------------------------------------------------------------------------
PTLock(char * lpszProtectedObjectName) :
Lockable(lpszProtectedObjectName),
m_nReaders(0),
m_nWriters(0) { }
///-------------------------------------------------------------------------------------------------
/// <summary> Reader lock. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <returns> The lock. </returns>
///-------------------------------------------------------------------------------------------------
int LockRO() {
int nDepth = Lock();
if(nDepth > 1) {
assert(m_nReaders > 0);
assert(m_nWriters == 0);
Unlock();
return nDepth;
}
while(m_nWriters > 0) {
Unlock();
Sleep(1);
Lock();
}
assert(m_nWriters == 0);
m_nReaders++;
Unlock();
return m_nReaders;
}
///-------------------------------------------------------------------------------------------------
/// <summary> Unlocks the ro. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
int UnlockRO() {
int nDepth = Lock();
assert(m_nReaders > 0);
assert(m_nWriters == 0);
if(nDepth == 1 && m_nReaders) {
m_nReaders--;
}
Unlock();
return m_nReaders;
}
///-------------------------------------------------------------------------------------------------
/// <summary> Writer lock. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
int LockRW() {
int nDepth = Lock();
if(nDepth > 1) {
assert(m_nReaders == 0);
assert(m_nWriters == 1);
return nDepth;
}
while(m_nReaders > 0) {
Unlock();
Sleep(1);
Lock();
}
assert(m_nReaders == 0);
assert(m_nWriters == 0);
m_nWriters++;
return m_nWriters;
}
///-------------------------------------------------------------------------------------------------
/// <summary> release a write lock. </summary>
///
/// <remarks> crossbac, 8/20/2013. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
int UnlockRW() {
assert(m_nWriters == 1);
assert(m_nReaders == 0);
if(GetLockDepth() > 1)
return Unlock();
m_nWriters--;
return Unlock();
}
protected:
/// <summary> The readers. </summary>
int m_nReaders;
/// <summary> The writers. </summary>
int m_nWriters;
};
};
#endif

Просмотреть файл

@ -1,65 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: ptprofsupport.h
//
// summary: macros for dealing with conditionally compiled runtime monitoring modes.
///-------------------------------------------------------------------------------------------------
#ifndef __PTASK_PROFSUPPORT_H__
#define __PTASK_PROFSUPPORT_H__
#ifdef PROFILE_REFCOUNT_OBJECTS
#include <sstream>
#endif
#ifdef PROFILE_PBUFFERS
#include <sstream>
#include "PBuffer.h"
#endif
namespace PTask {
namespace Runtime {
extern int g_bTPProfilingSupported;
extern int g_bRCProfilingSupported;
extern int g_bDBProfilingSupported;
extern int g_bCTProfilingSupported;
extern int g_bPBufferProfilingSupported;
extern int g_bInvocationCountingSupported;
extern int g_bBlockPoolProfilingSupported;
extern int g_bChannelProfilingSupported;
extern int g_bAdhocInstrumentationSupported;
extern int g_bSignalProfilingSupported;
};
};
#ifndef DEBUG
// warn PTask users if a release build supports a profiling mode
// that likely impacts performance (they all pretty much do)
#define WARN_PROFILE_SUPPORT(bSupport, bReqState) \
if(bSupport && bReqState) { \
MandatoryInform("XXXX: PERFORMANCE: Using %s(%d) support in release build!\n", \
__FUNCTION__, \
(bReqState)); \
}
#else
#define WARN_PROFILE_SUPPORT(bSupport, bReqState)
#endif
#define SET_PROFILER_MODE(bSupport, bReqState, bTarget) { \
if(!(bSupport)) { \
if(bReqState) { \
MandatoryInform("%s(%d) called, not supported in build!\n", \
__FUNCTION__, \
bReqState); \
} \
bTarget = FALSE; \
} else { \
WARN_PROFILE_SUPPORT(bSupport, bReqState); \
bTarget = bReqState; \
} }
#endif

Просмотреть файл

@ -1,16 +0,0 @@
//{{NO_DEPENDENCIES}}
// Microsoft Visual C++ generated include file.
// Used by ptask.rc
//
#define IDI_MAIN_ICON 101
// Next default values for new objects
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 113
#define _APS_NEXT_COMMAND_VALUE 40029
#define _APS_NEXT_CONTROL_VALUE 1000
#define _APS_NEXT_SYMED_VALUE 101
#endif
#endif

Просмотреть файл

@ -1,170 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: shrperft.h
//
// summary: Declares a thread-safe high resolution timer utility
///-------------------------------------------------------------------------------------------------
#ifndef _SHRPERFT_H_
#define _SHRPERFT_H_
#include "hrperft.h"
// performance timers are architecture and platform
// specific. The CHighResolutionTimer class defined in
// hrperft.h is lightweight but not thread-safe.
// This version is thread-safe, but will have higher
// overheads due to synchronization...Use this only for
// cases where measurements require a global time line
// across multiple threads.
///-------------------------------------------------------------------------------------------------
/// <summary> High resolution timer.
/// For collecting performance measurements.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
class CSharedPerformanceTimer {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Constructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="gran"> The granularity of the timer
/// (seconds, milliseconds, micro-seconds). </param>
///-------------------------------------------------------------------------------------------------
CSharedPerformanceTimer(hpf_granularity gran, bool bStart);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
~CSharedPerformanceTimer(void);
///-------------------------------------------------------------------------------------------------
/// <summary> Resets this timer. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
void reset();
///-------------------------------------------------------------------------------------------------
/// <summary> Return the time elapsed since the
/// last reset. For compatibility with hrperft, the reset parameter is
/// present, but will assert. Objects of this class should never be reset.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="reset"> true to reset. (ignored, will assert if true in debug mode)</param>
///
/// <returns> The elapsed time since the timer started </returns>
///-------------------------------------------------------------------------------------------------
double elapsed(bool reset=false);
protected:
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the tick count. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
__int64 tickcnt();
///-------------------------------------------------------------------------------------------------
/// <summary> Queries the system time. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="li"> The li. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
BOOL query_system_time(PLARGE_INTEGER li);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets the tick frequency of the underlying
/// counter primitive.
/// </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
double tickfreq();
/// <summary> lock. </summary>
CRITICAL_SECTION m_cs;
/// <summary> The granularity of the timer,
/// either seconds or milliseconds
/// </summary>
hpf_granularity m_gran;
/// <summary> the value of the underlying
/// timing primitive at the time the
/// timer was last reset.</summary>
__int64 m_start;
/// <summary> The frequency of the underlying
/// timing primitive </summary>
double m_freq;
/// <summary> Module for windows DLL for querying
/// system time getting perf counter
/// frequency.
/// </summary>
HMODULE m_hModule;
/// <summary> Function pointer for querying
/// system time
/// </summary>
LPFNtQuerySystemTime m_lpfnQuerySystemTime;
///-------------------------------------------------------------------------------------------------
/// <summary> Free resources allocated to support
/// query of system time. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///-------------------------------------------------------------------------------------------------
void free_query_system_time();
///-------------------------------------------------------------------------------------------------
/// <summary> Initialises the query system time. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
LPFNtQuerySystemTime init_query_system_time();
///-------------------------------------------------------------------------------------------------
/// <summary> Return the difference in milliseconds. </summary>
///
/// <remarks> Crossbac, 12/23/2011. </remarks>
///
/// <param name="lEarly"> The early. </param>
/// <param name="lLate"> The late. </param>
///
/// <returns> . </returns>
///-------------------------------------------------------------------------------------------------
DWORD delta_milliseconds(LARGE_INTEGER lEarly, LARGE_INTEGER lLate);
};
#endif

Просмотреть файл

@ -1,608 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: signalprofiler.h
//
// summary: Declares the signalprofiler class
///-------------------------------------------------------------------------------------------------
#ifndef __SIGNAL_PROFILER_H__
#define __SIGNAL_PROFILER_H__
#include "primitive_types.h"
#include "channel.h"
#include <vector>
#include <map>
#include <set>
#include <string>
#include <sstream>
#include "Lockable.h"
#include <assert.h>
#include "datablock.h"
#include "task.h"
#include "port.h"
class CHighResolutionTimer;
class CSharedPerformanceTimer;
namespace PTask {
class ReferenceCounted;
class Task;
class Port;
class Channel;
///-------------------------------------------------------------------------------------------------
/// <summary> Defines an alias representing the sigevttype. </summary>
///
/// <remarks> crossbac, 6/30/2014. </remarks>
///-------------------------------------------------------------------------------------------------
typedef enum sigevttype {
SIGEVT_UNSPECIFIED=0,
SIGEVT_INGRESS=1,
SIGEVT_EGRESS=2
} SIGEVTTYPE;
static const char * g_lpszSigEventTypeStrings[] = {
"SIGEVT_UNSPECIFIED",
"SIGEVT_INGRESS",
"SIGEVT_EGRESS"
};
#define SigEventTypeString(e) (g_lpszSigEventTypeStrings[(int)e])
typedef enum witnesstype_t {
wtport,
wttask,
wtchannel,
wtunknown
} WITNESSTYPE;
typedef enum channelsigactivitystate_t {
cas_none=0,
cas_unexercised=1,
cas_exercised=2
} CHANNELACTIVITYSTATE;
typedef enum channelpredicationstate_t {
cps_na=0,
cps_open=1,
cps_closed=2
} CHANNELPREDICATIONSTATE;
typedef struct SignalObservation_t {
SIGEVTTYPE eType;
double dTimestamp;
Lockable * pWitness;
WITNESSTYPE wType;
CONTROLSIGNAL luiRawSignal;
Datablock * pBlock;
UINT uiDBUID;
BOOL bTookRef;
///-------------------------------------------------------------------------------------------------
/// <summary> Signal observation t. </summary>
///
/// <remarks> crossbac, 6/30/2014. </remarks>
///-------------------------------------------------------------------------------------------------
struct SignalObservation_t() :
eType(SIGEVT_UNSPECIFIED),
dTimestamp(0.0),
pWitness(NULL),
luiRawSignal(0),
pBlock(NULL),
uiDBUID(0),
bTookRef(FALSE),
wType(wtunknown) {}
///-------------------------------------------------------------------------------------------------
/// <summary> Signal observation t. </summary>
///
/// <remarks> crossbac, 6/30/2014. </remarks>
///-------------------------------------------------------------------------------------------------
void
SignalObservation_t::Initialize(
__in SIGEVTTYPE _eType,
__in double _dTimestamp,
__in Lockable * _pWitness,
__in WITNESSTYPE _wType,
__in CONTROLSIGNAL _luiSignal,
__in Datablock * _pBlock,
__in UINT _uiDBUID,
__in BOOL _bTakeRef
)
{
eType = _eType;
dTimestamp = _dTimestamp;
pWitness = _pWitness;
luiRawSignal = _luiSignal;
pBlock = _pBlock;
wType = _wType;
uiDBUID = _uiDBUID;
bTookRef = _bTakeRef;
if(_bTakeRef)
pBlock->AddRef();
}
///-------------------------------------------------------------------------------------------------
/// <summary> Gets witness type. </summary>
///
/// <remarks> crossbac, 6/30/2014. </remarks>
///
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
///
/// <returns> The witness type. </returns>
///-------------------------------------------------------------------------------------------------
static WITNESSTYPE
GetWitnessType(
Lockable* pWitness
) {
Channel * pChannel = dynamic_cast<Channel*>(pWitness);
Port * pPort = dynamic_cast<Port*>(pWitness);
Task * pTask = dynamic_cast<Task*>(pWitness);
int nVPointerCount = 0;
nVPointerCount += pChannel ? 1 : 0;
nVPointerCount += pPort ? 1 : 0;
nVPointerCount += pTask ? 1 : 0;
assert(nVPointerCount == 1);
return pChannel ? wtchannel : (pPort ? wtport : (pTask ? wttask : wtunknown));
}
///-------------------------------------------------------------------------------------------------
/// <summary> Gets witness type. </summary>
///
/// <remarks> crossbac, 6/30/2014. </remarks>
///
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
///
/// <returns> The witness type. </returns>
///-------------------------------------------------------------------------------------------------
static char *
GetWitnessName(
Lockable* pWitness
) {
Channel * pChannel = dynamic_cast<Channel*>(pWitness);
Port * pPort = dynamic_cast<Port*>(pWitness);
Task * pTask = dynamic_cast<Task*>(pWitness);
int nVPointerCount = 0;
nVPointerCount += pChannel ? 1 : 0;
nVPointerCount += pPort ? 1 : 0;
nVPointerCount += pTask ? 1 : 0;
assert(nVPointerCount == 1);
return pChannel ? pChannel->GetName() : (pPort ? pPort->GetVariableBinding() : (pTask ? pTask->GetTaskName() : "wtunknown"));
}
///-------------------------------------------------------------------------------------------------
/// <summary> Signal observation t. </summary>
///
/// <remarks> crossbac, 6/30/2014. </remarks>
///-------------------------------------------------------------------------------------------------
struct SignalObservation_t(
__in SIGEVTTYPE _eType,
__in double _dTimestamp,
__in Lockable * _pWitness,
__in Datablock * _pBlock,
__in BOOL _bTakeRef=FALSE
)
{
CONTROLSIGNAL _luiSignal = _pBlock ? _pBlock->__getControlSignals() : DBCTLC_NONE;
UINT _uiDBUID = _pBlock ? _pBlock->GetDBUID() : 0;
WITNESSTYPE _wType = GetWitnessType(_pWitness);
Initialize(_eType, _dTimestamp, _pWitness, _wType, _luiSignal, _pBlock, _uiDBUID, _bTakeRef);
}
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> crossbac, 6/30/2014. </remarks>
///-------------------------------------------------------------------------------------------------
~SignalObservation_t() {
if(bTookRef && pBlock)
pBlock->Release();
}
///-------------------------------------------------------------------------------------------------
/// <summary> Stream insertion operator. </summary>
///
/// <remarks> crossbac, 6/30/2014. </remarks>
///
/// <param name="os"> [in,out] The operating system. </param>
/// <param name="pObservation"> [in,out] If non-null, the observation. </param>
///
/// <returns> The shifted result. </returns>
///-------------------------------------------------------------------------------------------------
friend std::ostream& operator<<(
std::ostream &os,
SignalObservation_t* pObservation
)
{
os << pObservation->dTimestamp << ": "
<< ControlSignalString(pObservation->luiRawSignal) << " "
<< SigEventTypeString(pObservation->eType) << " DB#"
<< pObservation->uiDBUID << " "
<< GetWitnessName(pObservation->pWitness);
return os;
}
} SIGOBSERVATION;
class SignalProfiler {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Query if 'luiControlSignal' is under profile. </summary>
///
/// <remarks> crossbac, 6/27/2014. </remarks>
///
/// <param name="luiControlSignal"> The lui control signal. </param>
///
/// <returns> true if under profile, false if not. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
IsUnderProfile(
__in CONTROLSIGNAL luiControlSignal
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if control signals on this block are under profile. </summary>
///
/// <remarks> crossbac, 6/27/2014. </remarks>
///
/// <param name="luiControlSignal"> The lui control signal. </param>
///
/// <returns> true if under profile, false if not. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
IsUnderProfile(
__in Datablock * pBlock
);
///-------------------------------------------------------------------------------------------------
/// <summary> Registers the signal as being one "of interest" to the profiler. </summary>
///
/// <remarks> crossbac, 6/27/2014. </remarks>
///
/// <param name="luiControlSignal"> The lui control signal. </param>
/// <param name="bEnable"> (Optional) the enable. </param>
///-------------------------------------------------------------------------------------------------
static void
RegisterSignal(
__in CONTROLSIGNAL luiControlSignal,
__in BOOL bEnable=TRUE
);
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes control signal profiling. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void Initialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Deinitialize control signal profiling. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void Deinitialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps profile statistics. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///
/// <param name="ss"> [in,out] The ss. </param>
///-------------------------------------------------------------------------------------------------
static void Report(std::ostream& ss);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets signal history for a particular graph object. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///
/// <returns> null if it fails, else the task dispatch history. </returns>
///-------------------------------------------------------------------------------------------------
static std::stringstream* GetHistory();
///-------------------------------------------------------------------------------------------------
/// <summary> Record signal transit. </summary>
///
/// <remarks> crossbac, 6/27/2014. </remarks>
///
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
/// <param name="pBlock"> [in,out] The lui control signal. </param>
/// <param name="eSigEventType"> Type of the signal event. </param>
///-------------------------------------------------------------------------------------------------
static void
RecordSignalTransit(
__in Lockable * pWitness,
__in Datablock * pBlock,
__in SIGEVTTYPE eSigEventType
);
///-------------------------------------------------------------------------------------------------
/// <summary> return true if the given graph object ever bore witness to
/// the given control signal. </summary>
///-------------------------------------------------------------------------------------------------
static BOOL
SignalTrafficOccurred(
__in Lockable * pWitness,
__in CONTROLSIGNAL luiControlSignal,
__in SIGEVTTYPE eType
);
///-------------------------------------------------------------------------------------------------
/// <summary> return true if the given graph object ever bore witness to
/// the given control signal. </summary>
///-------------------------------------------------------------------------------------------------
static BOOL
BalancedSignalTrafficOccurred(
__in Lockable * pWitness,
__in CONTROLSIGNAL luiControlSignal
);
///-------------------------------------------------------------------------------------------------
/// <summary> return true if the given graph object ever bore witness to
/// the given control signal. </summary>
///-------------------------------------------------------------------------------------------------
static BOOL
SuppressedSignalTrafficOccurred(
__in Lockable * pWitness,
__in CONTROLSIGNAL luiControlSignal
);
///-------------------------------------------------------------------------------------------------
/// <summary> return true if the given graph object ever bore witness to
/// the given control signal. </summary>
///-------------------------------------------------------------------------------------------------
static BOOL
ProfiledSignalTrafficOccurred(
__in Lockable * pWitness,
__in SIGEVTTYPE eType
);
///-------------------------------------------------------------------------------------------------
/// <summary> return true if the given graph object ever bore witness to
/// the given control signal. </summary>
///-------------------------------------------------------------------------------------------------
static BOOL
AnyProfiledSignalTrafficOccurred(
__in Lockable * pWitness
);
///-------------------------------------------------------------------------------------------------
/// <summary> return true if the given graph object ever bore witness to
/// the given control signal. </summary>
///-------------------------------------------------------------------------------------------------
static BOOL
SignalIngressOccurred(
__in Lockable * pWitness,
__in CONTROLSIGNAL luiControlSignal
);
///-------------------------------------------------------------------------------------------------
/// <summary> return true if the given graph object ever bore witness to
/// the given control signal. </summary>
///-------------------------------------------------------------------------------------------------
static BOOL
SignalEgressOccurred(
__in Lockable * pWitness,
__in CONTROLSIGNAL luiControlSignal
);
///-------------------------------------------------------------------------------------------------
/// <summary> return true if the given graph object ever bore witness to
/// the given control signal. </summary>
///-------------------------------------------------------------------------------------------------
static BOOL
ProfiledSignalIngressOccurred(
__in Lockable * pWitness
);
///-------------------------------------------------------------------------------------------------
/// <summary> return true if the given graph object ever bore witness to
/// the given control signal. </summary>
///-------------------------------------------------------------------------------------------------
static BOOL
ProfiledSignalEgressOccurred(
__in Lockable * pWitness
);
///-------------------------------------------------------------------------------------------------
/// <summary> Profiled signal transit suppressed. </summary>
///
/// <remarks> crossbac, 6/27/2014. </remarks>
///
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
BalancedProfiledSignalTrafficOccurred(
__in Lockable * pWitness
);
///-------------------------------------------------------------------------------------------------
/// <summary> Profiled signal transit suppressed. </summary>
///
/// <remarks> crossbac, 6/27/2014. </remarks>
///
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
///
/// <returns> true if it succeeds, false if it fails. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
SuppressedProfiledSignalTrafficOccurred(
__in Lockable * pWitness
);
///-------------------------------------------------------------------------------------------------
/// <summary> Query if 'pWitness' has relevant predicate. </summary>
///
/// <remarks> crossbac, 6/27/2014. </remarks>
///
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
///
/// <returns> true if relevant predicate, false if not. </returns>
///-------------------------------------------------------------------------------------------------
static BOOL
HasRelevantPredicate(
__in Lockable * pWitness
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets signal activity state. </summary>
///
/// <remarks> crossbac, 7/1/2014. </remarks>
///
/// <param name="pLockable"> [in,out] If non-null, the lockable. </param>
///
/// <returns> The signal activity state. </returns>
///-------------------------------------------------------------------------------------------------
static CHANNELACTIVITYSTATE
GetSignalActivityState(
__in Lockable * pLockable
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets channel signal predication state. </summary>
///
/// <remarks> crossbac, 7/1/2014. </remarks>
///
/// <param name="pLockable"> [in,out] If non-null, the lockable. </param>
///
/// <returns> The channel signal predication state. </returns>
///-------------------------------------------------------------------------------------------------
static CHANNELPREDICATIONSTATE
GetChannelSignalPredicationState(
__in Lockable * pLockable
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets channel coded color. </summary>
///
/// <remarks> crossbac, 7/1/2014. </remarks>
///
/// <param name="eActivityState"> State of the activity. </param>
/// <param name="ePredicationState"> State of the predication. </param>
///
/// <returns> null if it fails, else the channel coded color. </returns>
///-------------------------------------------------------------------------------------------------
static char *
GetChannelCodedColor(
__in CHANNELACTIVITYSTATE eActivityState,
__in CHANNELPREDICATIONSTATE ePredicationState
);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets channel coded name. </summary>
///
/// <remarks> crossbac, 7/1/2014. </remarks>
///
/// <param name="pLockable"> [in,out] If non-null, the lockable. </param>
/// <param name="bBlocked"> The blocked. </param>
///
/// <returns> The channel coded name. </returns>
///-------------------------------------------------------------------------------------------------
static std::string
GetChannelCodedName(
__in Lockable * pLockable,
__in BOOL bBlocked
);
/// <summary> true if signal profiler is initialized. </summary>
static BOOL s_bSignalProfilerInit;
protected:
static BOOL IsRelevantPredicate(CHANNELPREDICATE ePredicate);
#ifdef PROFILE_CONTROLSIGNALS
/// <summary> The control signal history. key is the raw control signal value
/// number, value is a vector of timestamps at which the raw signal was observed.
/// Since a raw signal may be the bitwise or multiple individual signals, we also
/// maintain a map for bitwise signal values.
/// </summary>
static std::map<Lockable*, std::set<SIGOBSERVATION*>> s_vWitnessToSignalMap;
static std::map<CONTROLSIGNAL, std::set<SIGOBSERVATION*>> s_vSignalToWitnessMap;
static std::map<double, std::set<SIGOBSERVATION*>> s_vSignalHistory;
static BOOL s_bFilterProfiledSignals;
static CONTROLSIGNAL s_luiSignalsOfInterest;
static CRITICAL_SECTION s_csSignalProfiler;
static CSharedPerformanceTimer * s_pGlobalProfileTimer;
static char * s_lpszChannelColors[3][3];
static void Lock();
static void Unlock();
static BOOL IsLocked();
static BOOL Enabled();
#pragma warning(disable:4127)
#define ctlpon() (PTask::Runtime::GetControlSignalProfileMode()&&(s_bSignalProfile!=NULL))
#define ctlptimer() (s_pGlobalProfileTimer)
#define ctlpdeclegressctr() UINT uiEgressCounter = 0
#define ctlpingress(l,b) SignalProfiler::RecordSignalTransit((l), (b), SIGEVTTYPE::SIGEVT_INGRESS)
#define ctlpegress(l,b) SignalProfiler::RecordSignalTransit((l), (b), SIGEVTTYPE::SIGEVT_EGRESS)
#define ctlpopegress(l,b) { ctlpcondegress(uiEgressCounter == 0, l, b); uiEgressCounter++; }
#define ctlpcondingress(c,l,b) if(c) { ctlpingress((l),(b)); }
#define ctlpcondegress(c,l,b) if(c) { ctlpegress((l),(b)); }
#define ctlpwasactive(x) SignalProfiler::AnyProfiledSignalTrafficOccurred(x)
#define ctlpwasbalanced(x) SignalProfiler::BalancedProfiledSignalTrafficOccurred((x))
#define ctlpwassuppresed(x) SignalProfiler::SuppressedProfiledSignalTrafficOccurred((x))
#define ctlphasrelevantpredicate(x) SignalProfiler::HasRelevantPredicate(x)
#define ctlpgetchactstate(x) SignalProfiler::GetSignalActivityState(x)
#define ctlpgetchpredstate(x) SignalProfiler::GetChannelSignalPredicationState(x)
#define ctlpgetchcolor(x,y) SignalProfiler::GetChannelCodedColor((x),(y))
#define ctlpgetchname(x,y) SignalProfiler::GetChannelCodedName((x),(y))
#else
#define ctlpon()
#define ctlptimer()
#define ctlpdeclegressctr()
#define ctlpingress(l,b)
#define ctlpegress(l,b)
#define ctlpopegress(l,b)
#define ctlpcondingress(c,l,b)
#define ctlpcondegress(c,l,b)
#define ctlpwasactive(x) FALSE
#define ctlpwasbalanced(x) FALSE
#define ctlpwassuppresed(x) FALSE
#define ctlphasrelevantpredicate(x) FALSE
#define ctlpgetchactstate(x) cas_none
#define ctlpgetchpredstate(x) cps_na
#define ctlpgetchcolor(x,y) "gray60"
#define ctlpgetchname(x,y) "channel"
#endif
};
};
#endif

Просмотреть файл

@ -1,13 +0,0 @@
//--------------------------------------------------------------------------------------
// File: symbiostypes.h
//
// Maintainer: crossbac@microsoft.com
//--------------------------------------------------------------------------------------
#ifndef _SYMBIOS_TYPES_H_
#define _SYMBIOS_TYPES_H_
// #include <d3dcommon.h>
//#include "ptdxhdr.h"
//#include "accelerator.h"
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,311 +0,0 @@
///-------------------------------------------------------------------------------------------------
// file: taskprofiler.h
//
// summary: Declares the taskprofiler class
///-------------------------------------------------------------------------------------------------
#ifndef _TASK_PROFILER_H_
#define _TASK_PROFILER_H_
#include "primitive_types.h"
#include <vector>
#include <map>
#include <set>
#include <string>
#include <sstream>
class CHighResolutionTimer;
class CSharedPerformanceTimer;
namespace PTask {
class Task;
class TaskProfile {
public:
///-------------------------------------------------------------------------------------------------
/// <summary> Default constructor. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///
/// <param name="pTask"> [in,out] If non-null, the task. </param>
///-------------------------------------------------------------------------------------------------
TaskProfile(Task * pTask);
///-------------------------------------------------------------------------------------------------
/// <summary> Destructor. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
virtual ~TaskProfile();
///-------------------------------------------------------------------------------------------------
/// <summary> Print migration stats. </summary>
///
/// <remarks> Crossbac, 1/11/2012. </remarks>
///
/// <param name="ss"> [in,out] The ss. </param>
///-------------------------------------------------------------------------------------------------
static void MigrationReport(std::ostream& ss);
/// <summary> The task. </summary>
Task * m_pTask;
/// <summary> The dispatch accelerator history. key is the dispatch
/// number, value is the accelerator upon which the dispatch
/// took place. </summary>
std::map<UINT, UINT> m_vDispatchAcceleratorHistory;
/// <summary> The dependent dispatch accelerator history. key is the dispatch
/// number, value is the accelerator used in the dependent binding.
/// Note that this object model assumes 1 depacc binding per task,
/// which is less general than what much of the code appears to allow
/// (in terms of binding cardinality, heterogeneity), but is in line with
/// the defacto limitations on dependent bindings at present.
/// </summary>
std::map<UINT, UINT> m_vDependentAcceleratorHistory;
std::map<std::string,
std::map<int,
std::vector<double>*>&> m_vEnterProfileMap;
std::map<std::string,
std::map<int,
std::vector<double>*>&> m_vExitProfileMap;
std::map<int, std::vector<double>*> m_vEnterAcquireDispatchResourceLocks;
std::map<int, std::vector<double>*> m_vEnterReleaseDispatchResourceLocks;
std::map<int, std::vector<double>*> m_vEnterMigrateInputs;
std::map<int, std::vector<double>*> m_vEnterAssembleIOLockList;
std::map<int, std::vector<double>*> m_vEnterSchedule;
std::map<int, std::vector<double>*> m_vEnterBlockedOnReadyQ;
std::map<int, std::vector<double>*> m_vEnterBlockedNotReady;
std::map<int, std::vector<double>*> m_vEnterPropagateDataflow;
std::map<int, std::vector<double>*> m_vEnterReleaseInflightDatablocks;
std::map<int, std::vector<double>*> m_vEnterRIBMaterializeViews;
std::map<int, std::vector<double>*> m_vEnterRIBSyncHost;
std::map<int, std::vector<double>*> m_vEnterBindMetaPorts;
std::map<int, std::vector<double>*> m_vEnterDispatch;
std::map<int, std::vector<double>*> m_vEnterPSDispatch;
std::map<int, std::vector<double>*> m_vEnterBindConstants;
std::map<int, std::vector<double>*> m_vEnterBindOutputs;
std::map<int, std::vector<double>*> m_vEnterBindInputs;
std::map<int, std::vector<double>*> m_vEnterAssignDependentAccelerator;
std::map<int, std::vector<double>*> m_vEnterDispatchTeardown;
std::map<int, std::vector<double>*> m_vExitAcquireDispatchResourceLocks;
std::map<int, std::vector<double>*> m_vExitReleaseDispatchResourceLocks;
std::map<int, std::vector<double>*> m_vExitMigrateInputs;
std::map<int, std::vector<double>*> m_vExitAssembleIOLockList;
std::map<int, std::vector<double>*> m_vExitSchedule;
std::map<int, std::vector<double>*> m_vExitBlockedOnReadyQ;
std::map<int, std::vector<double>*> m_vExitBlockedNotReady;
std::map<int, std::vector<double>*> m_vExitPropagateDataflow;
std::map<int, std::vector<double>*> m_vExitReleaseInflightDatablocks;
std::map<int, std::vector<double>*> m_vExitRIBMaterializeViews;
std::map<int, std::vector<double>*> m_vExitRIBSyncHost;
std::map<int, std::vector<double>*> m_vExitBindMetaPorts;
std::map<int, std::vector<double>*> m_vExitDispatch;
std::map<int, std::vector<double>*> m_vExitPSDispatch;
std::map<int, std::vector<double>*> m_vExitBindConstants;
std::map<int, std::vector<double>*> m_vExitBindOutputs;
std::map<int, std::vector<double>*> m_vExitBindInputs;
std::map<int, std::vector<double>*> m_vExitAssignDependentAccelerator;
std::map<int, std::vector<double>*> m_vExitDispatchTeardown;
CRITICAL_SECTION m_csTiming;
static UINT m_nMetrics;
static std::map<std::string, std::string> m_vMetricNickNames;
static std::map<UINT, std::string> m_vMetricOrder;
static std::stringstream m_ssTaskStats;
static std::stringstream m_ssTaskDispatchHistory;
static CRITICAL_SECTION m_csTaskProfiler;
static BOOL m_bProfilerOutputTabular;
static BOOL m_bTaskProfilerInit;
static CSharedPerformanceTimer * m_pGlobalProfileTimer;
static ULONG m_nInputBindEvents;
static ULONG m_nInputMigrations;
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes the task profiling. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///
/// <param name="bTabular"> true to tabular. </param>
///-------------------------------------------------------------------------------------------------
static void Initialize(BOOL bTabular=TRUE);
///-------------------------------------------------------------------------------------------------
/// <summary> Deinitialize task profiling. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
static void Deinitialize();
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps a task profile statistics. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///
/// <param name="ss"> [in,out] The ss. </param>
///-------------------------------------------------------------------------------------------------
static void Report(std::ostream& ss);
///-------------------------------------------------------------------------------------------------
/// <summary> Merge task instance statistics. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void MergeTaskInstanceStatistics();
///-------------------------------------------------------------------------------------------------
/// <summary> Initializes the task instance profiling. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void InitializeInstanceProfile();
///-------------------------------------------------------------------------------------------------
/// <summary> Deinitialize task instance profiling. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///-------------------------------------------------------------------------------------------------
void DeinitializeInstanceProfile();
///-------------------------------------------------------------------------------------------------
/// <summary> Dumps a task instance profile statistics. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///
/// <param name="ss"> [in,out] The ss. </param>
///-------------------------------------------------------------------------------------------------
void DumpTaskProfile(std::ostream& ss);
///-------------------------------------------------------------------------------------------------
/// <summary> Gets task dispatch history. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///
/// <returns> null if it fails, else the task dispatch history. </returns>
///-------------------------------------------------------------------------------------------------
std::stringstream* GetDispatchHistory();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets task instance profile statistics. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///
/// <returns> null if it fails, else the task instance profile statistics. </returns>
///-------------------------------------------------------------------------------------------------
std::stringstream* GetTaskProfile();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets task instance profile statistics columnar. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///
/// <returns> null if it fails, else the task instance profile statistics columnar. </returns>
///-------------------------------------------------------------------------------------------------
std::stringstream* GetTaskProfileColumnar();
///-------------------------------------------------------------------------------------------------
/// <summary> Gets task instance profile statistics tabular. </summary>
///
/// <remarks> Crossbac, 7/17/2013. </remarks>
///
/// <returns> null if it fails, else the task instance profile statistics tabular. </returns>
///-------------------------------------------------------------------------------------------------
std::stringstream* GetTaskProfileTabular();
#if (defined(GRAPH_DIAGNOSTICS) || defined(PROFILE_TASKS))
#define log_dispacc(x,y,z,b) { if(m_pTaskProfile) { \
m_pTaskProfile->m_vDispatchAcceleratorHistory[x] = y; \
if(b) { m_pTaskProfile->m_vDependentAcceleratorHistory[x] = z; } } }
#define PTR_LD Runtime::Tracer::LogDispatchEvent
#define PTR_EN() Runtime::GetDispatchTracingEnabled()
#define dispaccid() m_pDispatchAccelerator->GetAcceleratorId()
#define hasdepacc() (GetDependentBindingClassCount()!=0)
#define depaccid() ((hasdepacc())?(m_vDependentAcceleratorAssignments.begin()->second->at(0)->GetAcceleratorId()):0)
#define log_dispatch(bEnter) { if(PTR_EN()) { PTR_LD(m_lpszTaskName, (bEnter), dispaccid(), m_nDispatchNumber); } \
if(bEnter) { log_dispacc(m_nDispatchNumber, dispaccid(), depaccid(), hasdepacc()); } }
#define log_dispatch_enter() log_dispatch(TRUE)
#define log_dispatch_exit() log_dispatch(FALSE)
#else
#define log_dispatch_enter()
#define log_dispatch_exit()
#endif
#ifdef PROFILE_TASKS
#pragma warning(disable:4127)
#define tpon() (PTask::Runtime::GetTaskProfileMode()&&(m_pTaskProfile!=NULL))
#define tptimer() (m_pTaskProfile->m_pGlobalProfileTimer)
#define tpqtimer() (tpon()?tptimer()->elapsed(false):0.0)
#define tpprofile_enter(x) \
double dTPStart_##x = tpqtimer(); \
if(tpon()) { \
std::map<int, std::vector<double>*>::iterator xxmiTP_##x; \
xxmiTP_##x = m_pTaskProfile->m_vEnter##x.find(m_nDispatchNumber); \
if(xxmiTP_##x!=m_pTaskProfile->m_vEnter##x.end()) { \
xxmiTP_##x->second->push_back(dTPStart_##x); \
} else { \
std::vector<double>* l = new std::vector<double>(); \
l->push_back(dTPStart_##x); \
m_pTaskProfile->m_vEnter##x[m_nDispatchNumber] = l; \
}}
#define tpprofile_exit(x) \
double dTPExit_##x = tpqtimer(); \
if(tpon()) { \
std::map<int, std::vector<double>*>::iterator xxmiTP_##x; \
xxmiTP_##x = m_pTaskProfile->m_vExit##x.find(m_nDispatchNumber); \
if(xxmiTP_##x!=m_pTaskProfile->m_vExit##x.end()) { \
xxmiTP_##x->second->push_back(dTPExit_##x); \
} else { \
std::vector<double>* l = new std::vector<double>(); \
l->push_back(dTPExit_##x); \
m_pTaskProfile->m_vExit##x[m_nDispatchNumber] = l; \
}}
#define tpprofile_destroy(x) \
{ \
std::map<int, std::vector<double>*>::iterator xxmiTP_##x; \
for(xxmiTP_##x = m_vExit##x.begin(); \
xxmiTP_##x != m_vExit##x.end(); \
xxmiTP_##x++) { \
if(xxmiTP_##x->second) { \
delete xxmiTP_##x->second; \
} \
} \
for(xxmiTP_##x = m_vEnter##x.begin(); \
xxmiTP_##x != m_vEnter##x.end(); \
xxmiTP_##x++) { \
if(xxmiTP_##x->second) { \
delete xxmiTP_##x->second; \
} \
} \
m_vEnter##x.clear(); \
m_vExit##x.clear(); \
}
#else
#define tpprofile_enter(x)
#define tpprofile_exit(x)
#define tpprofile_init_map(x)
#define tpprofile_init_map_nickname(a,x,y)
#define tpprofile_destroy(x)
#endif
};
};
#endif

Просмотреть файл

@ -1,45 +0,0 @@
@echo off
if not exist version.txt (
echo version.txt not found in current directory. Please run from \Common\PTask in your CNTK tree. Exiting.
goto:end
)
if [%DANDELION_ROOT%] == [] (
echo DANDELION_ROOT environment variable must be set. Exiting.
goto:end
)
echo Checking out existing PTask files...
tf checkout . /r
echo Copying PTask release artifacts from %DANDELION_ROOT% ...
copy /Y %DANDELION_ROOT%\ptask\ptask\bin\x64\Release\ptask.lib lib\Release\ptask.lib
copy /Y %DANDELION_ROOT%\ptask\ptask\bin\x64\Release\ptask.pdb lib\Release\ptask.pdb
copy /Y %DANDELION_ROOT%\ptask\ptask\bin\x64\Debug\ptask.lib lib\Debug\ptask.lib
copy /Y %DANDELION_ROOT%\ptask\ptask\bin\x64\Debug\ptask.pdb lib\Debug\ptask.pdb
copy /Y %DANDELION_ROOT%\ptask\ptask\*.h include
echo Making sure any new files are added to the repository...
tf add . /r
echo.
echo ** Safe to ignore any warnings above about items already having pending changes **
echo.
echo Once you are ready to check in an update to PTask, perfrom the following steps:
echo.
echo // Note a timestamp that the PTask repository could be rolled back to to re-build this version of PTask.
echo notepad version.txt
echo.
echo // Perform checkin - automatically omits any files identical to their latest checked in version.
echo tf checkin
echo.
echo // View the contents of the checkin.
echo tf changeset nnnnn
echo.
echo // Check which files are still checked out.
echo tf status . /r
echo.
echo // Revert any files still checked out.
echo tf undo . /r
:end

Просмотреть файл

@ -1,13 +0,0 @@
Current
Version from PTask git repository at 17:30 on 7/8/2014.
Previous
Version from PTask git repository at 23:00 on 6/20/2014.
Version from MSR-SV Source Depot at 17:00 on 6/10/2014.
Version from MSR-SV Source Depot at 10:00 on 6/4/2014.
Version from MSR-SV Source Depot at 16:00 on 5/21/2014.
Version from MSR-SV Source Depot at 01:45 on 5/16/2014.
Version from MSR-SV Source Depot at 22:10 on 3/21/2014.
Version from MSR-SV Source Depot at 22:00 on 2/25/2014.
Version from MSR-SV Source Depot at 16:45 on 12/18/2013.
Version from MSR-SV Source Depot at 11:00 on 11/12/2013.

Просмотреть файл

@ -394,6 +394,7 @@ private:
size_t m_epochStartSample; // the starting sample for the epoch
size_t m_totalSamples; // number of samples in the dataset
bool m_partialMinibatch; // a partial minibatch is allowed
MBLayoutPtr m_pMBLayout;
int m_traceLevel;
vector<SectionFile*> m_secFiles;
@ -414,14 +415,14 @@ private:
public:
virtual void Init(const ConfigParameters& config);
virtual void Destroy();
BinaryReader() { }
BinaryReader() : m_pMBLayout(make_shared<MBLayout>()) { }
virtual ~BinaryReader();
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices);
size_t GetNumParallelSequences() { return 1 ;}
void SetNumParallelSequences(const size_t) { };
void CopyMBLayoutTo(MBLayoutPtr) {};
void CopyMBLayoutTo(MBLayoutPtr pMBLayout) { pMBLayout->CopyFrom(m_pMBLayout); NOT_IMPLEMENTED; }
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<typename BinaryReader<ElemType>::LabelIdType, typename BinaryReader<ElemType>::LabelType>& labelMapping);
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);

Просмотреть файл

@ -93,6 +93,7 @@ private:
bool m_partialMinibatch; // a partial minibatch is allowed
LabelKind m_labelType; // labels are categories, create mapping table
msra::dbn::randomordering m_randomordering; // randomizing class
MBLayoutPtr m_pMBLayout;
std::wstring m_labelsName;
std::wstring m_featuresName;
@ -136,14 +137,14 @@ private:
public:
virtual void Init(const ConfigParameters& config);
virtual void Destroy();
DSSMReader() { m_qfeaturesBuffer = NULL; m_dfeaturesBuffer = NULL; m_labelsBuffer = NULL; }
DSSMReader() : m_pMBLayout(make_shared<MBLayout>()) { m_qfeaturesBuffer = NULL; m_dfeaturesBuffer = NULL; m_labelsBuffer = NULL; }
virtual ~DSSMReader();
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices);
size_t GetNumParallelSequences() { return 1 ;}
void SetNumParallelSequences(const size_t) { };
void CopyMBLayoutTo(MBLayoutPtr) {};
void CopyMBLayoutTo(MBLayoutPtr pMBLayout) { pMBLayout->CopyFrom(m_pMBLayout); NOT_IMPLEMENTED; }
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, typename LabelType>& labelMapping);

Просмотреть файл

@ -17,10 +17,6 @@
#include "msra_mgram.h" // for unigram scores of ground-truth path in sequence training
#include "rollingwindowsource.h" // minibatch sources
#include "utterancesource.h"
#ifdef _WIN32
#include "readaheadsource.h"
#endif
#include "chunkevalsource.h"
#define DATAREADER_EXPORTS
#include "DataReader.h"

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -19,19 +19,14 @@ private:
const static size_t m_htkRandomizeAuto = 0;
const static size_t m_htkRandomizeDisable = (size_t)-1;
msra::dbn::minibatchiterator* m_mbiter;
msra::dbn::minibatchsource* m_frameSource;
#ifdef _WIN32
msra::dbn::minibatchreadaheadsource* m_readAheadSource;
#endif
msra::dbn::FileEvalSource* m_fileEvalSource;
msra::dbn::latticesource* m_lattices;
map<wstring,msra::lattices::lattice::htkmlfwordsequence> m_latticeMap;
unique_ptr<msra::dbn::minibatchiterator> m_mbiter;
unique_ptr<msra::dbn::minibatchsource> m_frameSource;
unique_ptr<msra::dbn::FileEvalSource> m_fileEvalSource;
unique_ptr<msra::dbn::latticesource> m_lattices;
map<wstring, msra::lattices::lattice::htkmlfwordsequence> m_latticeMap;
vector<bool> m_sentenceEnd;
bool m_readAhead;
bool m_truncated;
bool m_fullutt; //read full utterance every time
bool m_framemode;
vector<size_t> m_processedFrame;
intargvector m_numberOfuttsPerMinibatchForAllEpochs;
@ -40,9 +35,9 @@ private:
size_t m_mbSize;
vector<size_t> m_toProcess;
vector<size_t> m_switchFrame;
vector<size_t> m_validFrame; //valid frame number in each channel
vector<size_t> m_extraUttsPerMinibatch;
size_t m_extraUttNum;
vector<size_t> m_validFrame; //valid frame number in each channel
vector<size_t> m_extraUttsPerMinibatch;
size_t m_extraUttNum;
bool m_noData;
bool m_trainOrTest; // if false, in file writing mode
using LabelType = typename IDataReader<ElemType>::LabelType;
@ -52,33 +47,36 @@ private:
bool m_partialMinibatch; // allow partial minibatches?
std::vector<ElemType*> m_featuresBufferMultiUtt;
std::vector<std::shared_ptr<ElemType>> m_featuresBufferMultiUtt;
std::vector<size_t> m_featuresBufferAllocatedMultiUtt;
std::vector<ElemType*> m_labelsBufferMultiUtt;
std::vector<std::shared_ptr<ElemType>> m_labelsBufferMultiUtt;
std::vector<size_t> m_labelsBufferAllocatedMultiUtt;
std::vector<size_t> m_featuresStartIndexMultiUtt;
std::vector<size_t> m_labelsStartIndexMultiUtt;
CUDAPageLockedMemAllocator* m_cudaAllocator;
unique_ptr<CUDAPageLockedMemAllocator> m_cudaAllocator;
std::vector<std::shared_ptr<ElemType>> m_featuresBufferMultiIO;
std::vector<size_t> m_featuresBufferAllocatedMultiIO;
std::vector<std::shared_ptr<ElemType>> m_labelsBufferMultiIO;
std::vector<size_t> m_labelsBufferAllocatedMultiIO;
//for lattice uids and phoneboundaries
std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> m_latticeBufferMultiUtt;
std::vector<std::vector<size_t>> m_labelsIDBufferMultiUtt;
std::vector<std::vector<size_t>> m_phoneboundaryIDBufferMultiUtt;
std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> m_extraLatticeBufferMultiUtt;
std::vector<std::vector<size_t>> m_extraLabelsIDBufferMultiUtt;
std::vector<std::vector<size_t>> m_extraPhoneboundaryIDBufferMultiUtt;
//hmm
msra::asr::simplesenonehmm m_hset;
//for lattice uids and phoneboundaries
std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> m_latticeBufferMultiUtt;
std::vector<std::vector<size_t>> m_labelsIDBufferMultiUtt;
std::vector<std::vector<size_t>> m_phoneboundaryIDBufferMultiUtt;
std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> m_extraLatticeBufferMultiUtt;
std::vector<std::vector<size_t>> m_extraLabelsIDBufferMultiUtt;
std::vector<std::vector<size_t>> m_extraPhoneboundaryIDBufferMultiUtt;
//hmm
msra::asr::simplesenonehmm m_hset;
std::map<std::wstring,size_t> m_featureNameToIdMap;
std::map<std::wstring,size_t> m_labelNameToIdMap;
std::map<std::wstring,size_t> m_nameToTypeMap;
std::map<std::wstring,size_t> m_featureNameToDimMap;
std::map<std::wstring,size_t> m_labelNameToDimMap;
// for writing outputs to files (standard single input/output network) - deprecate eventually
bool m_checkDictionaryKeys;
bool m_convertLabelsToTargets;
@ -95,8 +93,8 @@ private:
void PrepareForWriting(const ConfigParameters& config);
bool GetMinibatchToTrainOrTest(std::map<std::wstring, Matrix<ElemType>*>&matrices);
bool GetMinibatch4SEToTrainOrTest(std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> & latticeinput, vector<size_t> &uids, vector<size_t> &boundaries, std::vector<size_t> &extrauttmap);
void fillOneUttDataforParallelmode(std::map<std::wstring, Matrix<ElemType>*>& matrices, size_t startFr, size_t framenum, size_t channelIndex, size_t sourceChannelIndex);
bool GetMinibatch4SEToTrainOrTest(std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> & latticeinput, vector<size_t> &uids, vector<size_t> &boundaries, std::vector<size_t> &extrauttmap);
void fillOneUttDataforParallelmode(std::map<std::wstring, Matrix<ElemType>*>& matrices, size_t startFr, size_t framenum, size_t channelIndex, size_t sourceChannelIndex);
bool GetMinibatchToWrite(std::map<std::wstring, Matrix<ElemType>*>&matrices);
void StartMinibatchLoopToTrainOrTest(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize);
@ -104,11 +102,11 @@ private:
bool ReNewBufferForMultiIO(size_t i);
size_t GetNumParallelSequences() { return m_numberOfuttsPerMinibatch; }
size_t GetNumParallelSequences();
void SetNumParallelSequences(const size_t) { };
void GetDataNamesFromConfig(const ConfigParameters& readerConfig, std::vector<std::wstring>& features, std::vector<std::wstring>& labels,
std::vector<std::wstring>& hmms, std::vector<std::wstring>& lattices);
void GetDataNamesFromConfig(const ConfigParameters& readerConfig, std::vector<std::wstring>& features, std::vector<std::wstring>& labels,
std::vector<std::wstring>& hmms, std::vector<std::wstring>& lattices);
size_t ReadLabelToTargetMappingFile (const std::wstring& labelToTargetMappingFile, const std::wstring& labelListFile, std::vector<std::vector<ElemType>>& labelToTargetMap);
@ -122,42 +120,9 @@ private:
};
private:
CUDAPageLockedMemAllocator* GetCUDAAllocator(int deviceID)
{
if (m_cudaAllocator != nullptr)
{
if (m_cudaAllocator->GetDeviceId() != deviceID)
{
delete m_cudaAllocator;
m_cudaAllocator = nullptr;
}
}
if (m_cudaAllocator == nullptr)
{
m_cudaAllocator = new CUDAPageLockedMemAllocator(deviceID);
}
return m_cudaAllocator;
}
std::shared_ptr<ElemType> AllocateIntermediateBuffer(int deviceID, size_t numElements)
{
if (deviceID >= 0)
{
// Use pinned memory for GPU devices for better copy performance
size_t totalSize = sizeof(ElemType) * numElements;
return std::shared_ptr<ElemType>((ElemType*)GetCUDAAllocator(deviceID)->Malloc(totalSize), [this, deviceID](ElemType* p) {
this->GetCUDAAllocator(deviceID)->Free((char*)p);
});
}
else
{
return std::shared_ptr<ElemType>(new ElemType[numElements], [](ElemType* p) {
delete[] p;
});
}
}
// Helper functions
unique_ptr<CUDAPageLockedMemAllocator>& GetCUDAAllocator(int deviceID);
std::shared_ptr<ElemType> AllocateIntermediateBuffer(int deviceID, size_t numElements);
public:
MBLayoutPtr m_pMBLayout;
@ -173,8 +138,8 @@ public:
{
}
virtual void Init(const ConfigParameters& config);
virtual void Destroy() {delete this;}
virtual ~HTKMLFReader();
virtual void Destroy() { delete this; }
virtual ~HTKMLFReader() { }
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize)
{
@ -192,8 +157,8 @@ public:
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, LabelType>& labelMapping);
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);
virtual bool GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> & latticeinput, vector<size_t> &uids, vector<size_t> &boundaries, vector<size_t> &extrauttmap);
virtual bool GetHmmData(msra::asr::simplesenonehmm * hmm);
virtual bool GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> & latticeinput, vector<size_t> &uids, vector<size_t> &boundaries, vector<size_t> &extrauttmap);
virtual bool GetHmmData(msra::asr::simplesenonehmm * hmm);
virtual bool DataEnd(EndDataType endDataType);
void CopyMBLayoutTo(MBLayoutPtr);

Просмотреть файл

@ -108,23 +108,12 @@
<ClInclude Include="htkfeatio.h" />
<ClInclude Include="HTKMLFReader.h" />
<ClInclude Include="HTKMLFWriter.h" />
<ClInclude Include="latticearchive.h" />
<ClInclude Include="latticestorage.h" />
<ClInclude Include="minibatchiterator.h" />
<ClInclude Include="minibatchsourcehelpers.h" />
<ClInclude Include="msra_mgram.h" />
<ClInclude Include="numahelpers.h" />
<ClInclude Include="pplhelpers.h" />
<ClInclude Include="readaheadsource.h" />
<ClInclude Include="rollingwindowsource.h" />
<ClInclude Include="simplesenonehmm.h" />
<ClInclude Include="simplethread.h" />
<ClInclude Include="simple_checked_arrays.h" />
<ClInclude Include="ssefloat4.h" />
<ClInclude Include="ssematrix.h" />
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
<ClInclude Include="utterancesource.h" />
<ClInclude Include="utterancesourcemulti.h" />
</ItemGroup>
<ItemGroup>

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше