summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp16
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td9
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp8
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td106
4 files changed, 83 insertions, 56 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 2c6e5ec0933..c890fdd1e51 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -2155,8 +2155,8 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
// Choose the SSE instruction sequence based on data type (float or double).
static const uint16_t OpcTable[2][4] = {
- { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
- { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }
+ { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
+ { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
};
const uint16_t *Opc = nullptr;
@@ -2236,14 +2236,18 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
} else {
+ const TargetRegisterClass *VR128 = &X86::VR128RegClass;
unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
LHSReg, LHSIsKill);
- unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
RHSReg, RHSIsKill);
- ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
- AndReg, /*IsKill=*/true);
+ unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
}
updateValueMap(I, ResultReg);
return true;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 71273493f53..f261ad62ee2 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -624,15 +624,6 @@ def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
-// These are needed to match a scalar load that is used in a vector-only
-// math instruction such as the FP logical ops: andps, andnps, orps, xorps.
-// The memory operand is required to be a 128-bit load, so it must be converted
-// from a vector to a scalar.
-def loadf32_128 : PatFrag<(ops node:$ptr),
- (f32 (extractelt (loadv4f32 node:$ptr), (iPTR 0)))>;
-def loadf64_128 : PatFrag<(ops node:$ptr),
- (f64 (extractelt (loadv2f64 node:$ptr), (iPTR 0)))>;
-
// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 6f05ad31f9d..07e9455ea4d 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8365,16 +8365,12 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
{ X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
{ X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
- { X86::FsANDNPSrr, X86::FsANDNPDrr,X86::PANDNrr },
{ X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
{ X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
- { X86::FsANDPSrr, X86::FsANDPDrr, X86::PANDrr },
{ X86::ORPSrm, X86::ORPDrm, X86::PORrm },
{ X86::ORPSrr, X86::ORPDrr, X86::PORrr },
- { X86::FsORPSrr, X86::FsORPDrr, X86::PORrr },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
- { X86::FsXORPSrr, X86::FsXORPDrr, X86::PXORrr },
// AVX 128-bit support
{ X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
{ X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
@@ -8385,16 +8381,12 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
{ X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
{ X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
- { X86::VFsANDNPSrr,X86::VFsANDNPDrr,X86::VPANDNrr },
{ X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
{ X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
- { X86::VFsANDPSrr, X86::VFsANDPDrr, X86::VPANDrr },
{ X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
{ X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
- { X86::VFsORPSrr, X86::VFsORPDrr, X86::VPORrr },
{ X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
{ X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
- { X86::VFsXORPSrr, X86::VFsXORPDrr, X86::VPXORrr },
// AVX 256-bit support
{ X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
{ X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 10c4f8b3a88..78fc83781e1 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2777,39 +2777,6 @@ defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
// SSE 1 & 2 - Logical Instructions
//===----------------------------------------------------------------------===//
-// Multiclass for scalars using the X86 logical operation aliases for FP.
-multiclass sse12_fp_packed_scalar_logical_alias<
- bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
- defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- FR32, f32, f128mem, loadf32_128, SSEPackedSingle, itins, 0>,
- PS, VEX_4V;
-
- defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- FR64, f64, f128mem, loadf64_128, SSEPackedDouble, itins, 0>,
- PD, VEX_4V;
-
- let Constraints = "$src1 = $dst" in {
- defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
- f32, f128mem, memopfsf32_128, SSEPackedSingle, itins>, PS;
-
- defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
- f64, f128mem, memopfsf64_128, SSEPackedDouble, itins>, PD;
- }
-}
-
-let isCodeGenOnly = 1 in {
- defm FsAND : sse12_fp_packed_scalar_logical_alias<0x54, "and", X86fand,
- SSE_BIT_ITINS_P>;
- defm FsOR : sse12_fp_packed_scalar_logical_alias<0x56, "or", X86for,
- SSE_BIT_ITINS_P>;
- defm FsXOR : sse12_fp_packed_scalar_logical_alias<0x57, "xor", X86fxor,
- SSE_BIT_ITINS_P>;
-
- let isCommutable = 0 in
- defm FsANDN : sse12_fp_packed_scalar_logical_alias<0x55, "andn", X86fandn,
- SSE_BIT_ITINS_P>;
-}
-
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
@@ -2965,6 +2932,43 @@ let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
(VANDNPDYrm VR256:$src1, addr:$src2)>;
}
+let Predicates = [HasAVX] in {
+ // Use packed logical operations for scalar ops.
+ def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (VANDPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (VORPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (VXORPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (VANDNPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+
+ def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (VANDPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (VORPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (VXORPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (VANDNPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+}
+
let Predicates = [UseSSE1] in {
def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
(ANDPSrr VR128:$src1, VR128:$src2)>;
@@ -2983,6 +2987,24 @@ let Predicates = [UseSSE1] in {
(XORPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
(ANDNPSrm VR128:$src1, addr:$src2)>;
+
+ // Use packed logical operations for scalar ops.
+ def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (ANDPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (ORPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (XORPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (ANDNPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
}
let Predicates = [UseSSE2] in {
@@ -3003,6 +3025,24 @@ let Predicates = [UseSSE2] in {
(XORPDrm VR128:$src1, addr:$src2)>;
def : Pat<(X86fandn VR128:$src1, (memopv2f64 addr:$src2)),
(ANDNPDrm VR128:$src1, addr:$src2)>;
+
+ // Use packed logical operations for scalar ops.
+ def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (ANDPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (ORPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (XORPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (ANDNPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
}
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud