summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>2011-09-08 18:05:08 +0000
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>2011-09-08 18:05:08 +0000
commitfb113a005155e1178e4eda3f28aff263ea38acdb (patch)
tree5512a63a38980b6dc5348fa0120ec5287e9c3edc
parentea8d803bb088a35f84cb29e2b827776178214356 (diff)
downloadbcm5719-llvm-fb113a005155e1178e4eda3f28aff263ea38acdb.tar.gz
bcm5719-llvm-fb113a005155e1178e4eda3f28aff263ea38acdb.zip
Add AVX versions of blend vector operations and fix some issues noticed
in Nadav's r139285 and r139287 commits. 1) Rename vsel.ll to a more descriptive name 2) Change the order of BLEND operands to "Op1, Op2, Cond", this is necessary because PBLENDVB is already used in different places with this order, and it was being emitted in the wrong way for vselect 3) Add AVX patterns and tests for the same SSE41 instructions llvm-svn: 139305
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td31
-rw-r--r--llvm/test/CodeGen/X86/avx-blend.ll47
-rw-r--r--llvm/test/CodeGen/X86/sse41-blend.ll (renamed from llvm/test/CodeGen/X86/vsel.ll)0
5 files changed, 69 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d74a87281c0..cf90490703e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8697,7 +8697,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Op2 = Op.getOperand(2);
DebugLoc DL = Op.getDebugLoc();
- SDValue Ops[] = {Cond, Op1, Op2};
+ SDValue Ops[] = {Op1, Op2, Cond};
assert(Op1.getValueType().isVector() && "Op1 must be a vector");
assert(Op2.getValueType().isVector() && "Op2 must be a vector");
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 7ad9c87d28f..c2db9177cde 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -61,10 +61,10 @@ def X86psignd : SDNode<"X86ISD::PSIGND",
def X86pblendvb : SDNode<"X86ISD::PBLENDVB",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
-def X86blendvpd : SDNode<"X86ISD::BLENDVPD",
+def X86blendvpd : SDNode<"X86ISD::BLENDVPD",
SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
-def X86blendvps : SDNode<"X86ISD::BLENDVPS",
+def X86blendvps : SDNode<"X86ISD::BLENDVPS",
SDTypeProfile<1, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 9148c76ce0c..6bcba728ae3 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5853,9 +5853,14 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
memopv32i8, int_x86_avx_blendv_ps_256>;
-def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$src3),
- (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$src3)>,
- Requires<[HasAVX]>;
+let Predicates = [HasAVX] in {
+ def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$mask),
+ (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>;
+ def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, VR128:$mask),
+ (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>;
+ def : Pat<(X86blendvps VR128:$src1, VR128:$src2, VR128:$mask),
+ (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>;
+}
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
@@ -5877,16 +5882,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
}
}
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
-def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),
- (PBLENDVBrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
-def : Pat<(X86blendvpd XMM0, VR128:$src1, VR128:$src2),
- (BLENDVPDrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
-def : Pat<(X86blendvps XMM0, VR128:$src1, VR128:$src2),
- (BLENDVPSrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
+let Predicates = [HasSSE41] in {
+ def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),
+ (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, XMM0),
+ (BLENDVPDrr0 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86blendvps VR128:$src1, VR128:$src2, XMM0),
+ (BLENDVPSrr0 VR128:$src1, VR128:$src2)>;
+}
let Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
diff --git a/llvm/test/CodeGen/X86/avx-blend.ll b/llvm/test/CodeGen/X86/avx-blend.ll
new file mode 100644
index 00000000000..68289ad1231
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx-blend.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mattr=+avx -march=x86 | FileCheck %s
+
+;CHECK: vsel_float
+;CHECK: vblendvps
+;CHECK: ret
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+ %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+ ret <4 x float> %vsel
+}
+
+
+;CHECK: vsel_i32
+;CHECK: vblendvps
+;CHECK: ret
+define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
+ %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %v1, <4 x i32> %v2
+ ret <4 x i32> %vsel
+}
+
+
+;CHECK: vsel_double
+;CHECK: vblendvpd
+;CHECK: ret
+define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
+ %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
+ ret <2 x double> %vsel
+}
+
+
+;CHECK: vsel_i64
+;CHECK: vblendvpd
+;CHECK: ret
+define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
+ %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
+ ret <2 x i64> %vsel
+}
+
+
+;CHECK: vsel_i8
+;CHECK: vpblendvb
+;CHECK: ret
+define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
+ %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
+ ret <16 x i8> %vsel
+}
+
+
diff --git a/llvm/test/CodeGen/X86/vsel.ll b/llvm/test/CodeGen/X86/sse41-blend.ll
index 3c854ac6066..3c854ac6066 100644
--- a/llvm/test/CodeGen/X86/vsel.ll
+++ b/llvm/test/CodeGen/X86/sse41-blend.ll
OpenPOWER on IntegriCloud