summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-02-28 06:19:55 +0000
committerCraig Topper <craig.topper@intel.com>2018-02-28 06:19:55 +0000
commitac799b05d42fee69a91bf35beb7f87c548827a42 (patch)
tree661c48f18385b72d7270652dbc2e0f0a8601c1fe /llvm/lib
parentc844524e46555737fd0870e78fcfcb93925ad723 (diff)
downloadbcm5719-llvm-ac799b05d42fee69a91bf35beb7f87c548827a42.tar.gz
bcm5719-llvm-ac799b05d42fee69a91bf35beb7f87c548827a42.zip
[X86] Change the masked FPCLASS implementation to use AND instead of OR to combine the mask results.
While the description for the instruction does mention OR, its talking about how the individual classification test results are ORed together. The incoming mask is used as a zeroing write mask. If the bit is 1 the classification is written to the output. The bit is 0 the output is 0. This equivalent to an AND. Here is pseudocode from the intrinsics guide FOR j := 0 to 1 i := j*64 IF k1[j] k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0]) ELSE k[j] := 0 FI ENDFOR k[MAX:2] := 0 llvm-svn: 326306
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp8
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td10
2 files changed, 8 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5449ef788d8..9225881a01f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19911,9 +19911,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
case X86ISD::CMPM_RND:
case X86ISD::CMPMU:
case X86ISD::VPSHUFBITQMB:
- return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
case X86ISD::VFPCLASS:
- return DAG.getNode(ISD::OR, dl, VT, Op, VMask);
+ return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
case ISD::TRUNCATE:
case X86ISD::VTRUNC:
case X86ISD::VTRUNCS:
@@ -19951,10 +19950,9 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask);
if (Op.getOpcode() == X86ISD::FSETCCM ||
- Op.getOpcode() == X86ISD::FSETCCM_RND)
+ Op.getOpcode() == X86ISD::FSETCCM_RND ||
+ Op.getOpcode() == X86ISD::VFPCLASSS)
return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
- if (Op.getOpcode() == X86ISD::VFPCLASSS)
- return DAG.getNode(ISD::OR, dl, VT, Op, IMask);
if (PreservedSrc.isUndef())
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 9fc9a7dcb72..e8ebbb33509 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2601,7 +2601,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
- [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ [(set _.KRC:$dst,(and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))))], itins.rr>,
EVEX_K, Sched<[itins.Sched]>;
@@ -2617,7 +2617,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
- [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ [(set _.KRC:$dst,(and _.KRCWM:$mask,
(OpNode _.ScalarIntMemCPat:$src1,
(i32 imm:$src2))))], itins.rm>,
EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
@@ -2641,7 +2641,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
- [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ [(set _.KRC:$dst,(and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))))], itins.rr>,
EVEX_K, Sched<[itins.Sched]>;
@@ -2657,7 +2657,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##mem#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
- [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
+ [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))))], itins.rm>,
EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
@@ -2676,7 +2676,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
_.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
_.BroadcastStr##", $src2}",
- [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
+ [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2))))], itins.rm>,
OpenPOWER on IntegriCloud