summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-06-10 00:41:07 +0000
committerCraig Topper <craig.topper@intel.com>2019-06-10 00:41:07 +0000
commitf7ba8b808a89d5e050deb7dbd77004fa8e0dff9b (patch)
tree51b045badd2ad9e909b3bf0a49c784fc713b5fa9 /llvm/lib
parent80fee25776c2fb61e74c1ecb1a523375c2500b69 (diff)
downloadbcm5719-llvm-f7ba8b808a89d5e050deb7dbd77004fa8e0dff9b.tar.gz
bcm5719-llvm-f7ba8b808a89d5e050deb7dbd77004fa8e0dff9b.zip
[X86] Convert f32/f64 FANDN/FAND/FOR/FXOR to vector logic ops and scalar_to_vector/extract_vector_elts to reduce isel patterns.
Previously we did the equivalent operation in isel patterns with COPY_TO_REGCLASS operations to transition. By inserting scalar_to_vetors and extract_vector_elts before isel we can allow each piece to be selected individually and accomplish the same final result. I ideally we'd use vector operations earlier in lowering/combine, but that looks to be more difficult. The scalar-fp-to-i64.ll changes are because we have a pattern for using movlpd for store+extract_vector_elt. While an f64 store uses movsd. The encoding sizes are the same. llvm-svn: 362914
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp43
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td45
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td93
3 files changed, 43 insertions, 138 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 1a8592e6264..107af0ca3c9 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -841,6 +841,49 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
CurDAG->DeleteNode(N);
continue;
}
+ case X86ISD::FANDN:
+ case X86ISD::FAND:
+ case X86ISD::FOR:
+ case X86ISD::FXOR: {
+ // Widen scalar fp logic ops to vector to reduce isel patterns.
+ // FIXME: Can we do this during lowering/combine.
+ MVT VT = N->getSimpleValueType(0);
+ if (VT.isVector() || VT == MVT::f128)
+ break;
+
+ MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32;
+ SDLoc dl(N);
+ SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
+ N->getOperand(0));
+ SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
+ N->getOperand(1));
+
+ SDValue Res;
+ if (Subtarget->hasSSE2()) {
+ EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
+ Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
+ Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
+ case X86ISD::FAND: Opc = ISD::AND; break;
+ case X86ISD::FOR: Opc = ISD::OR; break;
+ case X86ISD::FXOR: Opc = ISD::XOR; break;
+ }
+ Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
+ Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
+ } else {
+ Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
+ }
+ Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
+ CurDAG->getIntPtrConstant(0, dl));
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ ++I;
+ CurDAG->DeleteNode(N);
+ continue;
+ }
}
if (OptLevel != CodeGenOpt::None &&
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 4ca4fb7b122..fbadd80b242 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -5657,51 +5657,6 @@ defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
-let Predicates = [HasVLX,HasDQI] in {
- // Use packed logical operations for scalar ops.
- def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
- (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
- FR64X)>;
- def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
- (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
- FR64X)>;
- def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
- (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
- FR64X)>;
- def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
- (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
- FR64X)>;
-
- def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
- (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
- FR32X)>;
- def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
- (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
- FR32X)>;
- def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
- (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
- FR32X)>;
- def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
- (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
- FR32X)>;
-}
-
multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 27bcc2dc901..ade5645d5bf 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2417,99 +2417,6 @@ let Predicates = [HasAVX1Only] in {
(VANDNPSYrm VR256:$src1, addr:$src2)>;
}
-let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
- // Use packed logical operations for scalar ops.
- def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VANDPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VXORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VANDNPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
-
- def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VANDPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VXORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VANDNPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
-}
-
-let Predicates = [UseSSE1] in {
- // Use packed logical operations for scalar ops.
- def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (ANDPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (ORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (XORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (ANDNPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
-}
-
-let Predicates = [UseSSE2] in {
- // Use packed logical operations for scalar ops.
- def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (ANDPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (ORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (XORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (ANDNPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
-}
-
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
(VPANDrr VR128:$src1, VR128:$src2)>;
OpenPOWER on IntegriCloud