Adds documentation and tests for min/max intrinsics. (#81)

Improves xml-based tests to allow existing devices to be used. Improves dxexp to print out SM6/wave/i64 support under experimental mode. Bumps the hctversion to 0.7
2017-02-17 15:44:30 -08:00 · 2017-02-17 15:44:30 -08:00 · 7bdef32133
--- a/tools/clang/test/HLSL/ShaderOpArith.xml
+++ b/tools/clang/test/HLSL/ShaderOpArith.xml
@ -19,6 +19,31 @@
    ]]>
    </Shader>
  </ShaderOp>
+  <ShaderOp Name="MinMax" CS="CS" DispatchX="10" DispatchY="10">
+    <RootSignature>RootFlags(0), UAV(u0)</RootSignature>
+    <Resource Name="SPrimitives" Dimension="BUFFER" Width="1024" Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" TransitionTo="UNORDERED_ACCESS" Init="ByName" ReadBack="true" />
+    <RootValues>
+      <RootValue Index="0" ResName="SPrimitives" />
+    </RootValues>
+    <Shader Name="CS" Target="cs_6_0">
+      <![CDATA[
+      struct SMinMaxElem {
+        float f_fa;
+        float f_fb;
+        float f_fmin_o;
+        float f_fmax_o;
+      };
+    RWStructuredBuffer<SMinMaxElem> g_buf : register(u0);
+    [numthreads(10,10,1)]
+    void main(uint GI : SV_GroupIndex) {
+      SMinMaxElem l = g_buf[GI];
+      l.f_fmin_o = min(l.f_fa, l.f_fb);
+      l.f_fmax_o = max(l.f_fa, l.f_fb);
+      g_buf[GI] = l;
+    };
+    ]]>
+    </Shader>
+  </ShaderOp>
  <ShaderOp Name="OOB" PS="PS" VS="VS">
    <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2))</RootSignature>
    <Resource Name="CB0" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" TransitionTo="VERTEX_AND_CONSTANT_BUFFER">
--- a/tools/clang/unittests/HLSL/ExecutionTest.cpp
+++ b/tools/clang/unittests/HLSL/ExecutionTest.cpp
@ -194,6 +194,7 @@ public:
  TEST_METHOD(BasicComputeTest);
  TEST_METHOD(BasicTriangleTest);
  TEST_METHOD(BasicTriangleOpTest);
+  TEST_METHOD(MinMaxTest);
  TEST_METHOD(OutOfBoundsTest);
  TEST_METHOD(SaturateTest);
  TEST_METHOD(SignTest);
@ -617,6 +618,18 @@ public:
    }
  }

+  void ReadHlslDataIntoNewStream(LPCWSTR relativePath, IStream **ppStream) {
+    VERIFY_SUCCEEDED(m_support.Initialize());
+    CComPtr<IDxcLibrary> pLibrary;
+    CComPtr<IDxcBlobEncoding> pBlob;
+    CComPtr<IStream> pStream;
+    std::wstring path = GetPathToHlslDataFile(relativePath);
+    VERIFY_SUCCEEDED(m_support.CreateInstance(CLSID_DxcLibrary, &pLibrary));
+    VERIFY_SUCCEEDED(pLibrary->CreateBlobFromFile(path.c_str(), nullptr, &pBlob));
+    VERIFY_SUCCEEDED(pLibrary->CreateStreamFromBlobReadOnly(pBlob, &pStream));
+    *ppStream = pStream.Detach();
+  }
+
  void RecordRenderAndReadback(ID3D12GraphicsCommandList *pList,
                               ID3D12DescriptorHeap *pRtvHeap,
                               UINT rtvDescriptorSize,
@ -825,9 +838,9 @@ TEST_F(ExecutionTest, BasicComputeTest) {
  static const int DispatchGroupCount = 1;

  CComPtr<ID3D12Device> pDevice;
-
  if (!CreateDevice(&pDevice))
    return;
+
  std::vector<uint32_t> values;
  SetupComputeValuePattern(values, ThreadsPerGroup * DispatchGroupCount);
  VERIFY_ARE_EQUAL(values[0], 0);
@ -1016,9 +1029,9 @@ TEST_F(ExecutionTest, Int64Test) {
  static const int DispatchGroupCount = 1;

  CComPtr<ID3D12Device> pDevice;
-
  if (!CreateDevice(&pDevice))
    return;
+
  if (!DoesDeviceSupportInt64(pDevice)) {
    // Optional feature, so it's correct to not support it if declared as such.
    WEX::Logging::Log::Comment(L"Device does not support int64 operations.");
@ -1047,9 +1060,9 @@ TEST_F(ExecutionTest, SignTest) {
  static const int DispatchGroupCount = 1;

  CComPtr<ID3D12Device> pDevice;
-
  if (!CreateDevice(&pDevice))
    return;
+
  std::vector<uint32_t> values = { (uint32_t)-3, (uint32_t)-2, (uint32_t)-1, 0, 1, 2, 3, 4};
  RunRWByteBufferComputeTest(pDevice, pShader, values);
  VERIFY_ARE_EQUAL(values[0], -1);
@ -1159,9 +1172,9 @@ TEST_F(ExecutionTest, WaveIntrinsicsTest) {
  static const int DispatchGroupCount = 1;

  CComPtr<ID3D12Device> pDevice;
-
  if (!CreateDevice(&pDevice))
    return;
+
  if (!DoesDeviceSupportWaveOps(pDevice)) {
    // Optional feature, so it's correct to not support it if declared as such.
    WEX::Logging::Log::Comment(L"Device does not support wave operations.");
@ -1809,7 +1822,8 @@ static float g_SinCosFloats[] = {
 };

 std::shared_ptr<ShaderOpTestResult>
-RunShaderOpTest(dxc::DxcDllSupport &support, IStream *pStream, LPCSTR pName,
+RunShaderOpTest(ID3D12Device *pDevice, dxc::DxcDllSupport &support,
+                IStream *pStream, LPCSTR pName,
                st::ShaderOpTest::TInitCallbackFn pInitCallback) {
  DXASSERT_NOMSG(pStream != nullptr);
  std::shared_ptr<st::ShaderOpSet> ShaderOpSet =
@ -1860,18 +1874,15 @@ static bool isdenorm(double d) {

 TEST_F(ExecutionTest, DoShaderOpArithTest) {
  WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
-  VERIFY_SUCCEEDED(m_support.Initialize());
-  CComPtr<IDxcCompiler> pCompiler;
-  CComPtr<IDxcLibrary> pLibrary;
-  CComPtr<IDxcBlobEncoding> pBlob;
  CComPtr<IStream> pStream;
-  std::wstring path = GetPathToHlslDataFile(L"ShaderOpArith.xml");
-  VERIFY_SUCCEEDED(m_support.CreateInstance(CLSID_DxcLibrary, &pLibrary));
-  VERIFY_SUCCEEDED(pLibrary->CreateBlobFromFile(path.c_str(), nullptr, &pBlob));
-  VERIFY_SUCCEEDED(pLibrary->CreateStreamFromBlobReadOnly(pBlob, &pStream));
+  ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream);
+
+  CComPtr<ID3D12Device> pDevice;
+  if (!CreateDevice(&pDevice))
+    return;

  // Single operation test at the moment.
-  std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(m_support, pStream, "SinCos",
+  std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, "SinCos",
    [](LPCSTR Name, std::vector<BYTE> &Data) {
    // Initialize the SPrimitives buffer.
    VERIFY_IS_TRUE(0 == _stricmp(Name, "SPrimitives"));
@ -1933,20 +1944,111 @@ TEST_F(ExecutionTest, DoShaderOpArithTest) {
  }
 }

-TEST_F(ExecutionTest, OutOfBoundsTest) {
+static float ifdenorm_flushf(float a) {
+  return isdenorm(a) ? copysign(0.0f, a) : a;
+}
+
+static bool ifdenorm_flushf_eq(float a, float b) {
+  return ifdenorm_flushf(a) == ifdenorm_flushf(b);
+}
+
+static bool ifdenorm_flushf_eq_or_nans(float a, float b) {
+  if (isnan(a) && isnan(b)) return true;
+  return ifdenorm_flushf(a) == ifdenorm_flushf(b);
+}
+
+TEST_F(ExecutionTest, MinMaxTest) {
  WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
-  VERIFY_SUCCEEDED(m_support.Initialize());
-  CComPtr<IDxcCompiler> pCompiler;
-  CComPtr<IDxcLibrary> pLibrary;
-  CComPtr<IDxcBlobEncoding> pBlob;
  CComPtr<IStream> pStream;
-  std::wstring path = GetPathToHlslDataFile(L"ShaderOpArith.xml");
-  VERIFY_SUCCEEDED(m_support.CreateInstance(CLSID_DxcLibrary, &pLibrary));
-  VERIFY_SUCCEEDED(pLibrary->CreateBlobFromFile(path.c_str(), nullptr, &pBlob));
-  VERIFY_SUCCEEDED(pLibrary->CreateStreamFromBlobReadOnly(pBlob, &pStream));
+  ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream);
+
+  struct SMinMaxElem {
+    float f_fa;
+    float f_fb;
+    float f_fmin_o;
+    float f_fmax_o;
+  };
+  float TestValues[] = {
+    -(INFINITY),
+    -1.0f,
+    -(FLT_MIN/2),
+    -0.0f,
+    0.0f,
+    FLT_MIN / 2,
+    1.0f,
+    INFINITY,
+    NAN
+  };

  // Single operation test at the moment.
-  std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(m_support, pStream, "OOB", nullptr);
+  CComPtr<ID3D12Device> pDevice;
+  if (!CreateDevice(&pDevice))
+    return;
+
+  std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, "MinMax",
+    [&TestValues](LPCSTR Name, std::vector<BYTE> &Data) {
+    // Initialize the SPrimitives buffer.
+    VERIFY_IS_TRUE(0 == _stricmp(Name, "SPrimitives"));
+    size_t count = 10 * 10;
+    size_t size = sizeof(SMinMaxElem) * count;
+    Data.resize(size);
+    SMinMaxElem *pElems = (SMinMaxElem *)Data.data();
+    for (size_t a = 0; a < 10; ++a) {
+      float fa = TestValues[a % _countof(TestValues)];
+      for (size_t b = 0; b < 10; ++b) {
+        SMinMaxElem *p = &pElems[a * 10 + b];
+        ZeroMemory(p, sizeof(*p));
+        p->f_fa = fa;
+        p->f_fb = TestValues[b % _countof(TestValues)];
+      }
+    }
+  });
+  MappedData data;
+  test->Test->GetReadBackData("SPrimitives", &data);
+  // data.dump(); // Uncomment to dump raw bytes from buffer.
+
+  unsigned count = 10 * 10;
+  SMinMaxElem *pPrimitives = (SMinMaxElem *)data.data();
+  WEX::TestExecution::DisableVerifyExceptions dve;
+  static const float Error = 0.0008f;
+  for (unsigned i = 0; i < count; ++i) {
+    SMinMaxElem *p = &pPrimitives[i];
+    float fa = p->f_fa;
+    float fb = p->f_fb;
+    float fmin = p->f_fmin_o;
+    float fmax = p->f_fmax_o;
+    LogCommentFmt(L"Element #%u, a %f, b %f, min=%f, max=%f", i, fa, fb, fmin, fmax);
+    if (isnan(fa)) {
+      VERIFY_IS_TRUE(ifdenorm_flushf_eq_or_nans(fmin, fb));
+      VERIFY_IS_TRUE(ifdenorm_flushf_eq_or_nans(fmax, fb));
+    }
+    else if (isnan(fb)) {
+      VERIFY_IS_TRUE(ifdenorm_flushf_eq_or_nans(fmin, fa));
+      VERIFY_IS_TRUE(ifdenorm_flushf_eq_or_nans(fmax, fa));
+    }
+    else {
+      // Flushing is allowed - check both cases.
+      float fmax_0 = fa >= fb ? fa : fb;
+      float fmax_1 = ifdenorm_flushf(fmax_0);
+      VERIFY_IS_TRUE(fmax == fmax_0 || fmax == fmax_1);
+      float fmin_0 = fa < fb ? fa : fb;
+      float fmin_1 = ifdenorm_flushf(fmin_0);
+      VERIFY_IS_TRUE(fmin == fmin_0 || fmin == fmin_1);
+    }
+  }
+}
+
+TEST_F(ExecutionTest, OutOfBoundsTest) {
+  WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
+  CComPtr<IStream> pStream;
+  ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream);
+
+  // Single operation test at the moment.
+  CComPtr<ID3D12Device> pDevice;
+  if (!CreateDevice(&pDevice))
+    return;
+
+  std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, "OOB", nullptr);
  MappedData data;
  // Read back to CPU and examine contents - should get pure red.
  {
@ -1960,18 +2062,15 @@ TEST_F(ExecutionTest, OutOfBoundsTest) {

 TEST_F(ExecutionTest, SaturateTest) {
  WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
-  VERIFY_SUCCEEDED(m_support.Initialize());
-  CComPtr<IDxcCompiler> pCompiler;
-  CComPtr<IDxcLibrary> pLibrary;
-  CComPtr<IDxcBlobEncoding> pBlob;
  CComPtr<IStream> pStream;
-  std::wstring path = GetPathToHlslDataFile(L"ShaderOpArith.xml");
-  VERIFY_SUCCEEDED(m_support.CreateInstance(CLSID_DxcLibrary, &pLibrary));
-  VERIFY_SUCCEEDED(pLibrary->CreateBlobFromFile(path.c_str(), nullptr, &pBlob));
-  VERIFY_SUCCEEDED(pLibrary->CreateStreamFromBlobReadOnly(pBlob, &pStream));
+  ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream);

  // Single operation test at the moment.
-  std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(m_support, pStream, "Saturate", nullptr);
+  CComPtr<ID3D12Device> pDevice;
+  if (!CreateDevice(&pDevice))
+    return;
+
+  std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, "Saturate", nullptr);
  MappedData data;
  test->Test->GetReadBackData("U0", &data);
  const float *pValues = (float *)data.data();
@ -1989,18 +2088,15 @@ TEST_F(ExecutionTest, SaturateTest) {

 TEST_F(ExecutionTest, BasicTriangleOpTest) {
  WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
-  VERIFY_SUCCEEDED(m_support.Initialize());
-  CComPtr<IDxcCompiler> pCompiler;
-  CComPtr<IDxcLibrary> pLibrary;
-  CComPtr<IDxcBlobEncoding> pBlob;
  CComPtr<IStream> pStream;
-  std::wstring path = GetPathToHlslDataFile(L"ShaderOpArith.xml");
-  VERIFY_SUCCEEDED(m_support.CreateInstance(CLSID_DxcLibrary, &pLibrary));
-  VERIFY_SUCCEEDED(pLibrary->CreateBlobFromFile(path.c_str(), nullptr, &pBlob));
-  VERIFY_SUCCEEDED(pLibrary->CreateStreamFromBlobReadOnly(pBlob, &pStream));
+  ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream);

  // Single operation test at the moment.
-  std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(m_support, pStream, "Triangle", nullptr);
+  CComPtr<ID3D12Device> pDevice;
+  if (!CreateDevice(&pDevice))
+    return;
+
+  std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, "Triangle", nullptr);
  MappedData data;
  D3D12_RESOURCE_DESC &D = test->ShaderOp->GetResourceByName("RTarget")->Desc;
  UINT width = (UINT64)D.Width;
--- a/tools/clang/unittests/HLSL/ShaderOpTest.cpp
+++ b/tools/clang/unittests/HLSL/ShaderOpTest.cpp
@ -931,6 +931,10 @@ void ShaderOpTest::SetRootValues(ID3D12GraphicsCommandList *pList,
  }
 }

+void ShaderOpTest::SetDevice(ID3D12Device *pDevice) {
+  m_pDevice = pDevice;
+}
+
 void ShaderOpTest::SetDxcSupport(dxc::DxcDllSupport *pDxcSupport) {
  m_pDxcSupport = pDxcSupport;
 }
@ -942,7 +946,7 @@ void ShaderOpTest::SetInitCallback(TInitCallbackFn InitCallbackFn) {
 void ShaderOpTest::SetupRenderTarget(ShaderOp *pShaderOp, ID3D12Device *pDevice,
                                     ID3D12CommandQueue *pCommandQueue,
                                     ID3D12Resource *pRenderTarget) {
-  m_pDevice = pDevice;
+  SetDevice(pDevice);
  m_CommandList.Queue = pCommandQueue;
  // Simplification - add the render target name if missing, set it up 'by hand' if not.
  if (pShaderOp->RenderTargets.empty()) {
--- a/tools/clang/unittests/HLSL/ShaderOpTest.h
+++ b/tools/clang/unittests/HLSL/ShaderOpTest.h
@ -235,6 +235,7 @@ public:
  void GetReadBackData(LPCSTR pResourceName, MappedData *pData);
  void RunShaderOp(ShaderOp *pShaderOp);
  void RunShaderOp(std::shared_ptr<ShaderOp> pShaderOp);
+  void SetDevice(ID3D12Device* pDevice);
  void SetDxcSupport(dxc::DxcDllSupport *pDxcSupport);
  void SetInitCallback(TInitCallbackFn InitCallbackFn);
  void SetupRenderTarget(ShaderOp *pShaderOp, ID3D12Device *pDevice,
--- a/tools/dxexp/dxexp.cpp
+++ b/tools/dxexp/dxexp.cpp
@ -14,8 +14,15 @@
 #define WIN32_LEAN_AND_MEAN

 #include <Windows.h>
+#include <dxgi1_4.h>
+#include <d3d12.h>
+#include <atlbase.h>
 #include <stdio.h>

+#pragma comment(lib, "d3d12.lib")
+#pragma comment(lib, "dxgi.lib")
+#pragma comment(lib, "dxguid.lib")
+
 // A more recent Windows SDK than currently required is needed for these.
 typedef HRESULT (WINAPI *D3D12EnableExperimentalFeaturesFn)(
    UINT                                    NumFeatures,
@ -30,6 +37,49 @@ static const GUID D3D12ExperimentalShaderModelsID = { /* 76f5573e-f13a-40f5-b297
    { 0xb2, 0x97, 0x81, 0xce, 0x9e, 0x18, 0x93, 0x3f }
 };

+static HRESULT AtlCheck(HRESULT hr) {
+  if (FAILED(hr))
+    AtlThrow(hr);
+  return hr;
+}
+
+static char *BoolToStr(bool value) {
+  return value ? "YES" : "NO";
+}
+
+static void PrintAdapters() {
+  try {
+    CComPtr<IDXGIFactory2> pFactory;
+    AtlCheck(CreateDXGIFactory2(0, IID_PPV_ARGS(&pFactory)));
+    UINT AdapterIndex = 0;
+    for (;;) {
+      CComPtr<IDXGIAdapter1> pAdapter;
+      CComPtr<ID3D12Device> pDevice;
+      HRESULT hrEnum = pFactory->EnumAdapters1(AdapterIndex, &pAdapter);
+      if (hrEnum == DXGI_ERROR_NOT_FOUND)
+        break;
+      AtlCheck(hrEnum);
+      DXGI_ADAPTER_DESC1 AdapterDesc;
+      D3D12_FEATURE_DATA_D3D12_OPTIONS1 DeviceOptions;
+      D3D12_FEATURE_DATA_SHADER_MODEL DeviceSM;
+      AtlCheck(pAdapter->GetDesc1(&AdapterDesc));
+      AtlCheck(D3D12CreateDevice(pAdapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&pDevice)));
+      AtlCheck(pDevice->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS1, &DeviceOptions, sizeof(DeviceOptions)));
+      DeviceSM.HighestShaderModel = D3D_SHADER_MODEL_6_0;
+      AtlCheck(pDevice->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &DeviceSM, sizeof(DeviceSM)));
+      printf("%S - SM6 [%s] Wave [%s] I64 [%s]\n", AdapterDesc.Description,
+             BoolToStr(DeviceSM.HighestShaderModel >= D3D_SHADER_MODEL_6_0),
+             BoolToStr(DeviceOptions.WaveOps),
+             BoolToStr(DeviceOptions.Int64ShaderOps));
+      AdapterIndex++;
+    }
+  }
+  catch (ATL::CAtlException &) {
+    printf("%s", "Unable to print information for adapters.\n");
+  }
+}
+
+
 // Return codes:
 // 0 - experimental mode worked
 // 1 - cannot load d3d12.dll
@ -74,6 +124,7 @@ int main(int argc, const char *argv[]) {
  HRESULT hr = pD3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModelsID, nullptr, nullptr);
  if (SUCCEEDED(hr)) {
    printf("Experimental shader model feature succeeded.\n");
+    PrintAdapters();
    return 0;
  }
  else if (hr == E_NOINTERFACE) {
--- a/utils/hct/hctdb_inst_docs.txt
+++ b/utils/hct/hctdb_inst_docs.txt
@ -5,6 +5,11 @@
 # further remarks
 #
 # Keep these ordered alphabetically for ease of maintenance.
+#
+# Dump instructions with no extra documentation with this snippet.
+# import hctdb
+# h = hctdb.db_dxil()
+# for i in [item.name for item in h.instr if item.is_dxil_op and not item.remarks]: print(i)

 * Inst: Cos - returns cosine(theta) for theta in radians.

@ -25,6 +30,52 @@ The maximum absolute error is 0.0008 in the interval from -100*Pi to +100*Pi.
 The FAbs instruction takes simply forces the sign of the number(s) on the source operand positive, including on INF values.
 Applying FAbs on NaN preserves NaN, although the particular NaN bit pattern that results is not defined.

+* Inst: FMax - returns a if a >= b, else b
+
+>= is used instead of > so that if min(x,y) = x then max(x,y) = y.
+
+NaN has special handling: If one source operand is NaN, then the other source operand is returned.
+If both are NaN, any NaN representation is returned.
+This conforms to new IEEE 754R rules.
+
+Denorms are flushed (sign preserved) before comparison, however the result written to dest may or may not be denorm flushed.
+
+------+-----------------------------+
+| a    | b                           |
+|      +------+--------+------+------+
+|      | -inf | F      | +inf | NaN  |
+------+------+--------+------+------+
+| -inf | -inf | b      | +inf | -inf |
+------+------+--------+------+------+
+| F    | a    | a or b | +inf | a    |
+------+------+--------+------+------+
+| +inf | +inf | +inf   | +inf | +inf |
+------+------+--------+------+------+
+| NaN  | -inf | b      | +inf | NaN  |
+------+------+--------+------+------+
+
+* Inst: FMin - returns a if a < b, else b
+
+NaN has special handling: If one source operand is NaN, then the other source operand is returned.
+If both are NaN, any NaN representation is returned.
+This conforms to new IEEE 754R rules.
+
+Denorms are flushed (sign preserved) before comparison, however the result written to dest may or may not be denorm flushed.
+
+------+-----------------------------+
+| a    | b                           |
+|      +------+--------+------+------+
+|      | -inf | F      | +inf | NaN  |
+------+------+--------+------+------+
+| -inf | -inf | -inf   | -inf | -inf |
+------+------+--------+------+------+
+| F    | -inf | a or b |    a |    a |
+------+------+--------+------+------+
+| +inf | -inf | b      | +inf | +inf |
+------+------+--------+------+------+
+| NaN  | -inf | b      | +inf | NaN  |
+------+------+--------+------+------+
+
 * Inst: Saturate - clamps the result of a single or double precision floating point value to [0.0f...1.0f]

 The Saturate instruction performs the following operation on its input value:
--- a/utils/hct/hctversion.txt
+++ b/utils/hct/hctversion.txt
@ -1 +1 @@
-0.1
+0.7
 @ -1 +1 @@
 .1
 .7