Fix denorm preserve division with high value of denominator (#1093)

2018-03-01 07:41:10 -08:00 · 2018-03-01 07:41:10 -08:00 · 81ecfef34b
--- a/docs/DXIL.rst
+++ b/docs/DXIL.rst
@ -1980,6 +1980,67 @@ ExtractValue  extracts from aggregate
 ============= ======================================================================= =================


+FAdd
+~~~~
+
+%des = fadd float %src0, %src1
+
+The following table shows the results obtained when executing the instruction with various classes of numbers, assuming that "fp32-denorm-mode"="preserve".
+For "fp32-denorm-mode"="ftz" mode, denorms inputs should be treated as corresponding signed zero, and any resulting denorm is also flushed to zero.
+
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| src0\src1| -inf     | -F     | -denorm  | -0 | +0 | +denorm   |    +F  | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| -inf     | -inf     |   -inf | -inf     |-inf|-inf| -inf      |   -inf | NaN  | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| -F       | -inf     |   -F   | -F       |src0|src0| -F        |   +/-F | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| -denorm  | -inf     |   -F   |-F/denorm |src0|src0| +/-denorm |   +F   | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| -0       | -inf     |   src1 | src1     |-0  |+0  | src1      |   src1 | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| +0       | -inf     |   src1 | src1     |-0  |+0  | src1      |   src1 | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| +denorm  | -inf     |   -F   |+/-denorm |src0|src0| +F/denorm |   +F   | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| +F       | -inf     |  +/-F  | +F       |src0|src0| +F        |   +F   | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| +inf     | NaN      |   +inf | +inf     |+inf|+inf| +inf      |   +inf | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| NaN      | NaN      |   NaN  | NaN      |NaN |NaN | NaN       |   NaN  | NaN  | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+
+FDiv
+~~~~
+
+%dest = fdiv float %src0, %src1
+
+The following table shows the results obtained when executing the instruction with various classes of numbers, assuming that fast math flag is not used and "fp32-denorm-mode"="preserve".
+When "fp32-denorm-mode"="ftz", denorm inputs should be interpreted as corresponding signed zero, and any resulting denorm is also flushed to zero.
+When fast math is enabled, implementation may use reciprocal form: src0*(1/src1).  This may result in evaluating src0*(+/-)INF from src0*(1/(+/-)denorm).  This may produce NaN in some cases or (+/-)INF in others.
+
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| src0\\src1| -inf     | -F     |  -1   | -denorm | -0 | +0 | +denorm |  +1   |    +F  | +inf | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| -inf      | NaN      |   +inf | +inf  | +inf    |+inf|-inf| -inf    |  -inf |   -inf | NaN  | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| -F        | +0       |   +F   | -src0 | +F      |+inf|-inf| -F      |  src0 |   -F   | -0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| -denorm   | +0       | +denorm| -src0 | +F      |+inf|-inf| -F      |  src0 |-denorm | -0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| -0        | +0       |   +0   | +0    | 0       |NaN |NaN | 0       |  -0   |   -0   | -0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| +0        | -0       |   -0   | -0    | 0       |NaN |NaN | 0       |  +0   |   +0   | +0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| +denorm   | -0       | -denorm| -src0 | -F      |-inf|+inf| +F      |  src0 |+denorm | +0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| +F        | -0       |   -F   | -src0 | -F      |-inf|+inf| +F      |  src0 |   +F   | +0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| +inf      | NaN      |   -inf | -inf  | -inf    |-inf|+inf| +inf    |  +inf |   +inf | NaN  | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| NaN       | NaN      |   NaN  | NaN   | NaN     |NaN |NaN | NaN     |  NaN  |   NaN  | NaN  | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+
 .. INSTR-RST:END

 Operations via external functions
--- a/tools/clang/unittests/HLSL/ShaderOpArithTable.xml
+++ b/tools/clang/unittests/HLSL/ShaderOpArithTable.xml
@ -5798,14 +5798,14 @@
            <Parameter Name="ShaderOp.Target">cs_6_2</Parameter>
            <Parameter Name="Validation.Input1">
                <Value>0x007F0000</Value>
-                <Value>0x007F0000</Value>
-                <Value>0x40000000</Value>
+                <Value>0x807F0000</Value>
+                <Value>0x20000000</Value>
                <Value>0x00800000</Value>
            </Parameter>
            <Parameter Name="Validation.Input2">
                <Value>1</Value>
-                <Value>0x007F0000</Value>
-                <Value>0x7F7F0000</Value>
+                <Value>4</Value>
+                <Value>0x607F0000</Value>
                <Value>0x40000000</Value>
            </Parameter>
            <Parameter Name="Validation.Expected1">
@ -5925,20 +5925,20 @@
            <Parameter Name="ShaderOp.Target">cs_6_2</Parameter>
            <Parameter Name="Validation.Input1">
                <Value>0x007F0000</Value>
-                <Value>0x007F0000</Value>
-                <Value>0x40000000</Value>
+                <Value>0x807F0000</Value>
+                <Value>0x20000000</Value>
                <Value>0x00800000</Value>
            </Parameter>
            <Parameter Name="Validation.Input2">
                <Value>1</Value>
-                <Value>0x007F0000</Value>
-                <Value>0x7F7F0000</Value>
+                <Value>4</Value>
+                <Value>0x607F0000</Value>
                <Value>0x40000000</Value>
            </Parameter>
            <Parameter Name="Validation.Expected1">
                <Value>0x007F0000</Value>
-                <Value>1</Value>
-                <Value>0x00404040</Value>
+                <Value>0x801FC000</Value>
+                <Value>0x00101010</Value>
                <Value>0x00400000</Value>
            </Parameter>
            <Parameter Name="Validation.Expected2">
@ -6045,20 +6045,20 @@
            <Parameter Name="ShaderOp.Target">cs_6_2</Parameter>
            <Parameter Name="Validation.Input1">
                <Value>0x007F0000</Value>
-                <Value>0x007F0000</Value>
-                <Value>0x40000000</Value>
+                <Value>0x807F0000</Value>
+                <Value>0x20000000</Value>
                <Value>0x00800000</Value>
            </Parameter>
            <Parameter Name="Validation.Input2">
                <Value>1</Value>
-                <Value>0x007F0000</Value>
-                <Value>0x7F7F0000</Value>
+                <Value>4</Value>
+                <Value>0x607F0000</Value>
                <Value>0x40000000</Value>
            </Parameter>
            <Parameter Name="Validation.Expected1">
                <Value>0x007F0000</Value>
-                <Value>1</Value>
-                <Value>0x00404040</Value>
+                <Value>0x801FC000</Value>
+                <Value>0x00101010</Value>
                <Value>0x00400000</Value>
            </Parameter>
            <Parameter Name="ShaderOp.Arguments">-denorm preserve</Parameter>
--- a/utils/hct/hctdb_inst_docs.txt
+++ b/utils/hct/hctdb_inst_docs.txt
@ -588,3 +588,62 @@ dest0, dest1 = USubb(src0, src1)
 * Inst: AttributeAtVertex - returns the values of the attributes at the vertex.

 returns the values of the attributes at the vertex. VertexID ranges from 0 to 2.
+
+* Inst: FDiv - returns the quotient of its two operands
+
+%dest = fdiv float %src0, %src1
+
+The following table shows the results obtained when executing the instruction with various classes of numbers, assuming that fast math flag is not used and "fp32-denorm-mode"="preserve".
+When "fp32-denorm-mode"="ftz", denorm inputs should be interpreted as corresponding signed zero, and any resulting denorm is also flushed to zero.
+When fast math is enabled, implementation may use reciprocal form: src0*(1/src1).  This may result in evaluating src0*(+/-)INF from src0*(1/(+/-)denorm).  This may produce NaN in some cases or (+/-)INF in others.
+
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| src0\\src1| -inf     | -F     |  -1   | -denorm | -0 | +0 | +denorm |  +1   |    +F  | +inf | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| -inf      | NaN      |   +inf | +inf  | +inf    |+inf|-inf| -inf    |  -inf |   -inf | NaN  | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| -F        | +0       |   +F   | -src0 | +F      |+inf|-inf| -F      |  src0 |   -F   | -0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| -denorm   | +0       | +denorm| -src0 | +F      |+inf|-inf| -F      |  src0 |-denorm | -0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| -0        | +0       |   +0   | +0    | 0       |NaN |NaN | 0       |  -0   |   -0   | -0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| +0        | -0       |   -0   | -0    | 0       |NaN |NaN | 0       |  +0   |   +0   | +0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| +denorm   | -0       | -denorm| -src0 | -F      |-inf|+inf| +F      |  src0 |+denorm | +0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| +F        | -0       |   -F   | -src0 | -F      |-inf|+inf| +F      |  src0 |   +F   | +0   | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| +inf      | NaN      |   -inf | -inf  | -inf    |-inf|+inf| +inf    |  +inf |   +inf | NaN  | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+| NaN       | NaN      |   NaN  | NaN   | NaN     |NaN |NaN | NaN     |  NaN  |   NaN  | NaN  | NaN |
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
+
+* Inst: FAdd - component-wise add
+
+%des = fadd float %src0, %src1
+
+The following table shows the results obtained when executing the instruction with various classes of numbers, assuming that "fp32-denorm-mode"="preserve". 
+For "fp32-denorm-mode"="ftz" mode, denorms inputs should be treated as corresponding signed zero, and any resulting denorm is also flushed to zero.
+
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| src0\src1| -inf     | -F     | -denorm  | -0 | +0 | +denorm   |    +F  | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| -inf     | -inf     |   -inf | -inf     |-inf|-inf| -inf      |   -inf | NaN  | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| -F       | -inf     |   -F   | -F       |src0|src0| -F        |   +/-F | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| -denorm  | -inf     |   -F   |-F/denorm |src0|src0| +/-denorm |   +F   | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| -0       | -inf     |   src1 | src1     |-0  |+0  | src1      |   src1 | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| +0       | -inf     |   src1 | src1     |-0  |+0  | src1      |   src1 | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| +denorm  | -inf     |   -F   |+/-denorm |src0|src0| +F/denorm |   +F   | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| +F       | -inf     |  +/-F  | +F       |src0|src0| +F        |   +F   | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| +inf     | NaN      |   +inf | +inf     |+inf|+inf| +inf      |   +inf | +inf | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
+| NaN      | NaN      |   NaN  | NaN      |NaN |NaN | NaN       |   NaN  | NaN  | NaN |
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
--- a/utils/hct/hctdb_test.py
+++ b/utils/hct/hctdb_test.py
@ -802,9 +802,9 @@ def add_test_cases():
    [['0x0', '0x00FE0000', '0x007F0000', '0x007A0000']],
    'cs_6_2', get_shader_text("binary float", "-"))
    add_test_case_denorm('FDivDenorm', ['FDiv'], 'ulp', 1,
-    [['0x007F0000', '0x007F0000', '0x40000000', '0x00800000'],['1', '0x007F0000', '0x7F7F0000', '0x40000000']],
+    [['0x007F0000', '0x807F0000', '0x20000000', '0x00800000'],['1', '4', '0x607F0000', '0x40000000']],
    [['0', 'NaN', '0', '0']],
-    [['0x007F0000', '1', '0x00404040', '0x00400000']],
+    [['0x007F0000', '0x801FC000', '0x00101010', '0x00400000']],
    'cs_6_2', get_shader_text("binary float", "/"))
    add_test_case_denorm('FMulDenorm', ['FMul'], 'ulp', 1,
    [['0x00000300', '0x007F0000', '0x007F0000', '0x001E0000', '0x00000300'],['128', '1', '0x007F0000', '20', '0x78000000']],