summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp8
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td91
-rw-r--r--llvm/test/CodeGen/X86/avx-256-logic.ll45
4 files changed, 82 insertions, 64 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f953bf2bc7f..e06d84c54a0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -11821,10 +11821,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
- // Want to form ANDNP nodes, in the hopes of then easily combining them with
- // OR and AND nodes to form PBLEND/PSIGN.
+ // Want to form ANDNP nodes:
+ // 1) In the hopes of then easily combining them with OR and AND nodes
+ // to form PBLEND/PSIGN.
+ // 2) To match ANDN packed intrinsics
EVT VT = N->getValueType(0);
- if (VT != MVT::v2i64)
+ if (VT != MVT::v2i64 && VT != MVT::v4i64)
return SDValue();
SDValue N0 = N->getOperand(0);
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 67a5a345be8..e35a6751929 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -47,7 +47,7 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86andnp : SDNode<"X86ISD::ANDNP",
- SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86psignb : SDNode<"X86ISD::PSIGNB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 72b383000af..e6167c73d30 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1473,98 +1473,68 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
- SDNode OpNode, int HasPat = 0,
- list<list<dag>> Pattern = []> {
+ SDNode OpNode> {
let Pattern = []<dag> in {
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
- !if(HasPat, Pattern[0], // rr
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
- VR128:$src2)))]),
- !if(HasPat, Pattern[2], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))]), 0>,
- VEX_4V;
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
- !if(HasPat, Pattern[1], // rr
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64
- VR128:$src2))))]),
- !if(HasPat, Pattern[3], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))]), 0>,
- OpSize, VEX_4V;
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>,
+ OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
- !if(HasPat, Pattern[0], // rr
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
- VR128:$src2)))]),
- !if(HasPat, Pattern[2], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))])>, TB;
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB;
defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
- !if(HasPat, Pattern[1], // rr
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64
- VR128:$src2))))]),
- !if(HasPat, Pattern[3], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))])>,
- TB, OpSize;
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB, OpSize;
}
}
/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
///
multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
- SDNode OpNode, int HasNoPat = 0> {
+ SDNode OpNode> {
defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem,
- !if(HasNoPat, []<dag>, // rr
- [(set VR256:$dst, (v4i64 (OpNode VR256:$src1,
- VR256:$src2)))]),
- !if(HasNoPat, []<dag>, // rm
- [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (memopv4i64 addr:$src2)))]), 0>, VEX_4V;
+ [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>, VEX_4V;
defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem,
- !if(HasNoPat, []<dag>, // rr
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (bc_v4i64 (v4f64 VR256:$src2))))]),
- !if(HasNoPat, []<dag>, // rm
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (memopv4i64 addr:$src2)))]), 0>,
- OpSize, VEX_4V;
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (bc_v4i64 (v4f64 VR256:$src2))))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>,
+ OpSize, VEX_4V;
}
// AVX 256-bit packed logical ops forms
-defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
-defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
-defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
-let isCommutable = 0 in {
- defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", undef /* dummy */, 1>;
-}
+defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
+defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
+defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
+defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
defm AND : sse12_fp_packed_logical<0x54, "and", and>;
defm OR : sse12_fp_packed_logical<0x56, "or", or>;
defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
let isCommutable = 0 in
- defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
- // single r+r
- [(set VR128:$dst, (X86andnp VR128:$src1, VR128:$src2))],
- // double r+r
- [],
- // single r+m
- [(set VR128:$dst, (X86andnp VR128:$src1, (memopv2i64 addr:$src2)))],
- // double r+m
- []]>;
+ defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
@@ -3678,6 +3648,7 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
}
// Move scalar to XMM zero-extended
diff --git a/llvm/test/CodeGen/X86/avx-256-logic.ll b/llvm/test/CodeGen/X86/avx-256-logic.ll
index 05e82895fec..d9e5d081fb1 100644
--- a/llvm/test/CodeGen/X86/avx-256-logic.ll
+++ b/llvm/test/CodeGen/X86/avx-256-logic.ll
@@ -114,3 +114,48 @@ entry:
ret <8 x float> %1
}
+; CHECK: vandnpd
+define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+ %0 = bitcast <4 x double> %x to <4 x i64>
+ %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %1 = bitcast <4 x double> %y to <4 x i64>
+ %and.i = and <4 x i64> %1, %neg.i
+ %2 = bitcast <4 x i64> %and.i to <4 x double>
+ ret <4 x double> %2
+}
+
+; CHECK: vandnpd (%
+define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+ %tmp2 = load <4 x double>* %x, align 32
+ %0 = bitcast <4 x double> %y to <4 x i64>
+ %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %1 = bitcast <4 x double> %tmp2 to <4 x i64>
+ %and.i = and <4 x i64> %1, %neg.i
+ %2 = bitcast <4 x i64> %and.i to <4 x double>
+ ret <4 x double> %2
+}
+
+; CHECK: vandnps
+define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+ %0 = bitcast <8 x float> %x to <8 x i32>
+ %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %1 = bitcast <8 x float> %y to <8 x i32>
+ %and.i = and <8 x i32> %1, %neg.i
+ %2 = bitcast <8 x i32> %and.i to <8 x float>
+ ret <8 x float> %2
+}
+
+; CHECK: vandnps (%
+define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+ %tmp2 = load <8 x float>* %x, align 32
+ %0 = bitcast <8 x float> %y to <8 x i32>
+ %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %1 = bitcast <8 x float> %tmp2 to <8 x i32>
+ %and.i = and <8 x i32> %1, %neg.i
+ %2 = bitcast <8 x i32> %and.i to <8 x float>
+ ret <8 x float> %2
+}
OpenPOWER on IntegriCloud