Fix denorm preserve division with high value of denominator (#1093)
This commit is contained in:
Родитель
c8970cdf6b
Коммит
81ecfef34b
|
@ -1980,6 +1980,67 @@ ExtractValue extracts from aggregate
|
|||
============= ======================================================================= =================
|
||||
|
||||
|
||||
FAdd
|
||||
~~~~
|
||||
|
||||
%des = fadd float %src0, %src1
|
||||
|
||||
The following table shows the results obtained when executing the instruction with various classes of numbers, assuming that "fp32-denorm-mode"="preserve".
|
||||
For "fp32-denorm-mode"="ftz" mode, denorms inputs should be treated as corresponding signed zero, and any resulting denorm is also flushed to zero.
|
||||
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| src0\src1| -inf | -F | -denorm | -0 | +0 | +denorm | +F | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| -inf | -inf | -inf | -inf |-inf|-inf| -inf | -inf | NaN | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| -F | -inf | -F | -F |src0|src0| -F | +/-F | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| -denorm | -inf | -F |-F/denorm |src0|src0| +/-denorm | +F | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| -0 | -inf | src1 | src1 |-0 |+0 | src1 | src1 | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| +0 | -inf | src1 | src1 |-0 |+0 | src1 | src1 | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| +denorm | -inf | -F |+/-denorm |src0|src0| +F/denorm | +F | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| +F | -inf | +/-F | +F |src0|src0| +F | +F | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| +inf | NaN | +inf | +inf |+inf|+inf| +inf | +inf | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| NaN | NaN | NaN | NaN |NaN |NaN | NaN | NaN | NaN | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
|
||||
FDiv
|
||||
~~~~
|
||||
|
||||
%dest = fdiv float %src0, %src1
|
||||
|
||||
The following table shows the results obtained when executing the instruction with various classes of numbers, assuming that fast math flag is not used and "fp32-denorm-mode"="preserve".
|
||||
When "fp32-denorm-mode"="ftz", denorm inputs should be interpreted as corresponding signed zero, and any resulting denorm is also flushed to zero.
|
||||
When fast math is enabled, implementation may use reciprocal form: src0*(1/src1). This may result in evaluating src0*(+/-)INF from src0*(1/(+/-)denorm). This may produce NaN in some cases or (+/-)INF in others.
|
||||
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| src0\\src1| -inf | -F | -1 | -denorm | -0 | +0 | +denorm | +1 | +F | +inf | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| -inf | NaN | +inf | +inf | +inf |+inf|-inf| -inf | -inf | -inf | NaN | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| -F | +0 | +F | -src0 | +F |+inf|-inf| -F | src0 | -F | -0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| -denorm | +0 | +denorm| -src0 | +F |+inf|-inf| -F | src0 |-denorm | -0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| -0 | +0 | +0 | +0 | 0 |NaN |NaN | 0 | -0 | -0 | -0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| +0 | -0 | -0 | -0 | 0 |NaN |NaN | 0 | +0 | +0 | +0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| +denorm | -0 | -denorm| -src0 | -F |-inf|+inf| +F | src0 |+denorm | +0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| +F | -0 | -F | -src0 | -F |-inf|+inf| +F | src0 | +F | +0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| +inf | NaN | -inf | -inf | -inf |-inf|+inf| +inf | +inf | +inf | NaN | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| NaN | NaN | NaN | NaN | NaN |NaN |NaN | NaN | NaN | NaN | NaN | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
|
||||
.. INSTR-RST:END
|
||||
|
||||
Operations via external functions
|
||||
|
|
|
@ -5798,14 +5798,14 @@
|
|||
<Parameter Name="ShaderOp.Target">cs_6_2</Parameter>
|
||||
<Parameter Name="Validation.Input1">
|
||||
<Value>0x007F0000</Value>
|
||||
<Value>0x007F0000</Value>
|
||||
<Value>0x40000000</Value>
|
||||
<Value>0x807F0000</Value>
|
||||
<Value>0x20000000</Value>
|
||||
<Value>0x00800000</Value>
|
||||
</Parameter>
|
||||
<Parameter Name="Validation.Input2">
|
||||
<Value>1</Value>
|
||||
<Value>0x007F0000</Value>
|
||||
<Value>0x7F7F0000</Value>
|
||||
<Value>4</Value>
|
||||
<Value>0x607F0000</Value>
|
||||
<Value>0x40000000</Value>
|
||||
</Parameter>
|
||||
<Parameter Name="Validation.Expected1">
|
||||
|
@ -5925,20 +5925,20 @@
|
|||
<Parameter Name="ShaderOp.Target">cs_6_2</Parameter>
|
||||
<Parameter Name="Validation.Input1">
|
||||
<Value>0x007F0000</Value>
|
||||
<Value>0x007F0000</Value>
|
||||
<Value>0x40000000</Value>
|
||||
<Value>0x807F0000</Value>
|
||||
<Value>0x20000000</Value>
|
||||
<Value>0x00800000</Value>
|
||||
</Parameter>
|
||||
<Parameter Name="Validation.Input2">
|
||||
<Value>1</Value>
|
||||
<Value>0x007F0000</Value>
|
||||
<Value>0x7F7F0000</Value>
|
||||
<Value>4</Value>
|
||||
<Value>0x607F0000</Value>
|
||||
<Value>0x40000000</Value>
|
||||
</Parameter>
|
||||
<Parameter Name="Validation.Expected1">
|
||||
<Value>0x007F0000</Value>
|
||||
<Value>1</Value>
|
||||
<Value>0x00404040</Value>
|
||||
<Value>0x801FC000</Value>
|
||||
<Value>0x00101010</Value>
|
||||
<Value>0x00400000</Value>
|
||||
</Parameter>
|
||||
<Parameter Name="Validation.Expected2">
|
||||
|
@ -6045,20 +6045,20 @@
|
|||
<Parameter Name="ShaderOp.Target">cs_6_2</Parameter>
|
||||
<Parameter Name="Validation.Input1">
|
||||
<Value>0x007F0000</Value>
|
||||
<Value>0x007F0000</Value>
|
||||
<Value>0x40000000</Value>
|
||||
<Value>0x807F0000</Value>
|
||||
<Value>0x20000000</Value>
|
||||
<Value>0x00800000</Value>
|
||||
</Parameter>
|
||||
<Parameter Name="Validation.Input2">
|
||||
<Value>1</Value>
|
||||
<Value>0x007F0000</Value>
|
||||
<Value>0x7F7F0000</Value>
|
||||
<Value>4</Value>
|
||||
<Value>0x607F0000</Value>
|
||||
<Value>0x40000000</Value>
|
||||
</Parameter>
|
||||
<Parameter Name="Validation.Expected1">
|
||||
<Value>0x007F0000</Value>
|
||||
<Value>1</Value>
|
||||
<Value>0x00404040</Value>
|
||||
<Value>0x801FC000</Value>
|
||||
<Value>0x00101010</Value>
|
||||
<Value>0x00400000</Value>
|
||||
</Parameter>
|
||||
<Parameter Name="ShaderOp.Arguments">-denorm preserve</Parameter>
|
||||
|
|
|
@ -588,3 +588,62 @@ dest0, dest1 = USubb(src0, src1)
|
|||
* Inst: AttributeAtVertex - returns the values of the attributes at the vertex.
|
||||
|
||||
returns the values of the attributes at the vertex. VertexID ranges from 0 to 2.
|
||||
|
||||
* Inst: FDiv - returns the quotient of its two operands
|
||||
|
||||
%dest = fdiv float %src0, %src1
|
||||
|
||||
The following table shows the results obtained when executing the instruction with various classes of numbers, assuming that fast math flag is not used and "fp32-denorm-mode"="preserve".
|
||||
When "fp32-denorm-mode"="ftz", denorm inputs should be interpreted as corresponding signed zero, and any resulting denorm is also flushed to zero.
|
||||
When fast math is enabled, implementation may use reciprocal form: src0*(1/src1). This may result in evaluating src0*(+/-)INF from src0*(1/(+/-)denorm). This may produce NaN in some cases or (+/-)INF in others.
|
||||
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| src0\\src1| -inf | -F | -1 | -denorm | -0 | +0 | +denorm | +1 | +F | +inf | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| -inf | NaN | +inf | +inf | +inf |+inf|-inf| -inf | -inf | -inf | NaN | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| -F | +0 | +F | -src0 | +F |+inf|-inf| -F | src0 | -F | -0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| -denorm | +0 | +denorm| -src0 | +F |+inf|-inf| -F | src0 |-denorm | -0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| -0 | +0 | +0 | +0 | 0 |NaN |NaN | 0 | -0 | -0 | -0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| +0 | -0 | -0 | -0 | 0 |NaN |NaN | 0 | +0 | +0 | +0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| +denorm | -0 | -denorm| -src0 | -F |-inf|+inf| +F | src0 |+denorm | +0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| +F | -0 | -F | -src0 | -F |-inf|+inf| +F | src0 | +F | +0 | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| +inf | NaN | -inf | -inf | -inf |-inf|+inf| +inf | +inf | +inf | NaN | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
| NaN | NaN | NaN | NaN | NaN |NaN |NaN | NaN | NaN | NaN | NaN | NaN |
|
||||
+-----------+----------+--------+-------+---------+----+----+---------+-------+--------+------+-----+
|
||||
|
||||
* Inst: FAdd - component-wise add
|
||||
|
||||
%des = fadd float %src0, %src1
|
||||
|
||||
The following table shows the results obtained when executing the instruction with various classes of numbers, assuming that "fp32-denorm-mode"="preserve".
|
||||
For "fp32-denorm-mode"="ftz" mode, denorms inputs should be treated as corresponding signed zero, and any resulting denorm is also flushed to zero.
|
||||
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| src0\src1| -inf | -F | -denorm | -0 | +0 | +denorm | +F | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| -inf | -inf | -inf | -inf |-inf|-inf| -inf | -inf | NaN | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| -F | -inf | -F | -F |src0|src0| -F | +/-F | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| -denorm | -inf | -F |-F/denorm |src0|src0| +/-denorm | +F | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| -0 | -inf | src1 | src1 |-0 |+0 | src1 | src1 | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| +0 | -inf | src1 | src1 |-0 |+0 | src1 | src1 | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| +denorm | -inf | -F |+/-denorm |src0|src0| +F/denorm | +F | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| +F | -inf | +/-F | +F |src0|src0| +F | +F | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| +inf | NaN | +inf | +inf |+inf|+inf| +inf | +inf | +inf | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
||||
| NaN | NaN | NaN | NaN |NaN |NaN | NaN | NaN | NaN | NaN |
|
||||
+----------+----------+--------+----------+----+----+-----------+--------+------+-----+
|
|
@ -802,9 +802,9 @@ def add_test_cases():
|
|||
[['0x0', '0x00FE0000', '0x007F0000', '0x007A0000']],
|
||||
'cs_6_2', get_shader_text("binary float", "-"))
|
||||
add_test_case_denorm('FDivDenorm', ['FDiv'], 'ulp', 1,
|
||||
[['0x007F0000', '0x007F0000', '0x40000000', '0x00800000'],['1', '0x007F0000', '0x7F7F0000', '0x40000000']],
|
||||
[['0x007F0000', '0x807F0000', '0x20000000', '0x00800000'],['1', '4', '0x607F0000', '0x40000000']],
|
||||
[['0', 'NaN', '0', '0']],
|
||||
[['0x007F0000', '1', '0x00404040', '0x00400000']],
|
||||
[['0x007F0000', '0x801FC000', '0x00101010', '0x00400000']],
|
||||
'cs_6_2', get_shader_text("binary float", "/"))
|
||||
add_test_case_denorm('FMulDenorm', ['FMul'], 'ulp', 1,
|
||||
[['0x00000300', '0x007F0000', '0x007F0000', '0x001E0000', '0x00000300'],['128', '1', '0x007F0000', '20', '0x78000000']],
|
||||
|
|
Загрузка…
Ссылка в новой задаче