summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-11-03 20:27:01 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-11-03 20:27:01 +0000
commite88dc04c4854124c6cf4c2d0fc554029ea190846 (patch)
tree58d9c047e0b7c6f1414b269108364704a1b89962 /llvm
parentf4acad30ec08c6fd66d63fa09f300c55c4afd90b (diff)
downloadbcm5719-llvm-e88dc04c4854124c6cf4c2d0fc554029ea190846.tar.gz
bcm5719-llvm-e88dc04c4854124c6cf4c2d0fc554029ea190846.zip
[X86][XOP] Add support for the matching of the VPCMOV bit select instruction
XOP has the VPCMOV instruction that performs the common vector bit select operation OR( AND( SRC1, SRC3 ), AND( SRC2, ~SRC3 ) ) This patch adds tablegen pattern matching for this instruction. Differential Revision: http://reviews.llvm.org/D8841 llvm-svn: 251975
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp11
-rw-r--r--llvm/lib/Target/X86/X86InstrXOP.td10
-rw-r--r--llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll5
-rw-r--r--llvm/test/CodeGen/X86/xop-pcmov.ll162
4 files changed, 185 insertions, 3 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 9de149559df..12c354c89b2 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -200,6 +200,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "x86.avx2.pblendd.128" ||
Name == "x86.avx2.pblendd.256" ||
Name == "x86.avx2.vbroadcasti128" ||
+ Name == "x86.xop.vpcmov" ||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
NewFn = nullptr;
return true;
@@ -457,6 +458,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep =
Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
Builder.getInt8(Imm)});
+ } else if (Name == "llvm.x86.xop.vpcmov") {
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+ Value *Sel = CI->getArgOperand(2);
+ unsigned NumElts = CI->getType()->getVectorNumElements();
+ Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
+ Value *NotSel = Builder.CreateXor(Sel, MinusOne);
+ Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
+ Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
+ Rep = Builder.CreateOr(Sel0, Sel1);
} else if (Name == "llvm.x86.sse42.crc32.64.8") {
Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_sse42_crc32_32_8);
diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td
index df9d906f4d9..4cb2304e464 100644
--- a/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/llvm/lib/Target/X86/X86InstrXOP.td
@@ -281,6 +281,16 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
let ExeDomain = SSEPackedInt in
defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
+let Predicates = [HasXOP] in {
+ def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, VR128:$src2))),
+ (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+
+ def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, VR256:$src2))),
+ (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+}
+
multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
diff --git a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll
index e96fed36d15..3b4c6ea1210 100644
--- a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll
+++ b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -61,15 +61,14 @@ define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float>
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
- ; CHECK: vpcmov
+ ; CHECK: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
%res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
- ; CHECK: vpcmov
- ; CHECK: ymm
+ ; CHECK: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
%res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
ret <4 x i64> %res
}
diff --git a/llvm/test/CodeGen/X86/xop-pcmov.ll b/llvm/test/CodeGen/X86/xop-pcmov.ll
new file mode 100644
index 00000000000..165d4a7232d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/xop-pcmov.ll
@@ -0,0 +1,162 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s
+
+define <4 x double> @pcmov_4f64(<4 x double> %a, <4 x double> %b, <4 x double> %m) {
+; CHECK-LABEL: pcmov_4f64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = bitcast <4 x double> %m to <4 x i64>
+ %2 = bitcast <4 x double> %a to <4 x i64>
+ %3 = and <4 x i64> %1, %2
+ %4 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %5 = bitcast <4 x double> %b to <4 x i64>
+ %6 = and <4 x i64> %4, %5
+ %7 = or <4 x i64> %3, %6
+ %8 = bitcast <4 x i64> %7 to <4 x double>
+ ret <4 x double> %8
+}
+
+define <2 x double> @pcmov_2f64(<2 x double> %a, <2 x double> %b, <2 x double> %m) {
+; CHECK-LABEL: pcmov_2f64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = bitcast <2 x double> %m to <2 x i64>
+ %2 = bitcast <2 x double> %a to <2 x i64>
+ %3 = and <2 x i64> %1, %2
+ %4 = xor <2 x i64> %1, <i64 -1, i64 -1>
+ %5 = bitcast <2 x double> %b to <2 x i64>
+ %6 = and <2 x i64> %4, %5
+ %7 = or <2 x i64> %3, %6
+ %8 = bitcast <2 x i64> %7 to <2 x double>
+ ret <2 x double> %8
+}
+
+define <8 x float> @pcmov_8f32(<8 x float> %a, <8 x float> %b, <8 x float> %m) {
+; CHECK-LABEL: pcmov_8f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = bitcast <8 x float> %m to <8 x i32>
+ %2 = bitcast <8 x float> %a to <8 x i32>
+ %3 = and <8 x i32> %1, %2
+ %4 = xor <8 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %5 = bitcast <8 x float> %b to <8 x i32>
+ %6 = and <8 x i32> %4, %5
+ %7 = or <8 x i32> %3, %6
+ %8 = bitcast <8 x i32> %7 to <8 x float>
+ ret <8 x float> %8
+}
+
+define <4 x float> @pcmov_4f32(<4 x float> %a, <4 x float> %b, <4 x float> %m) {
+; CHECK-LABEL: pcmov_4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = bitcast <4 x float> %m to <4 x i32>
+ %2 = bitcast <4 x float> %a to <4 x i32>
+ %3 = and <4 x i32> %1, %2
+ %4 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %5 = bitcast <4 x float> %b to <4 x i32>
+ %6 = and <4 x i32> %4, %5
+ %7 = or <4 x i32> %3, %6
+ %8 = bitcast <4 x i32> %7 to <4 x float>
+ ret <4 x float> %8
+}
+
+define <4 x i64> @pcmov_4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %m) {
+; CHECK-LABEL: pcmov_4i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = and <4 x i64> %a, %m
+ %2 = xor <4 x i64> %m, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %3 = and <4 x i64> %b, %2
+ %4 = or <4 x i64> %1, %3
+ ret <4 x i64> %4
+}
+
+define <2 x i64> @pcmov_2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %m) {
+; CHECK-LABEL: pcmov_2i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = and <2 x i64> %a, %m
+ %2 = xor <2 x i64> %m, <i64 -1, i64 -1>
+ %3 = and <2 x i64> %b, %2
+ %4 = or <2 x i64> %1, %3
+ ret <2 x i64> %4
+}
+
+define <8 x i32> @pcmov_8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %m) {
+; CHECK-LABEL: pcmov_8i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = and <8 x i32> %a, %m
+ %2 = xor <8 x i32> %m, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %3 = and <8 x i32> %b, %2
+ %4 = or <8 x i32> %1, %3
+ ret <8 x i32> %4
+}
+
+define <4 x i32> @pcmov_4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %m) {
+; CHECK-LABEL: pcmov_4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = and <4 x i32> %a, %m
+ %2 = xor <4 x i32> %m, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %3 = and <4 x i32> %b, %2
+ %4 = or <4 x i32> %1, %3
+ ret <4 x i32> %4
+}
+
+define <16 x i16> @pcmov_16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %m) {
+; CHECK-LABEL: pcmov_16i16:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = and <16 x i16> %a, %m
+ %2 = xor <16 x i16> %m, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %3 = and <16 x i16> %b, %2
+ %4 = or <16 x i16> %1, %3
+ ret <16 x i16> %4
+}
+
+define <8 x i16> @pcmov_8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %m) {
+; CHECK-LABEL: pcmov_8i16:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = and <8 x i16> %a, %m
+ %2 = xor <8 x i16> %m, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %3 = and <8 x i16> %b, %2
+ %4 = or <8 x i16> %1, %3
+ ret <8 x i16> %4
+}
+
+define <32 x i8> @pcmov_32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %m) {
+; CHECK-LABEL: pcmov_32i8:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = and <32 x i8> %a, %m
+ %2 = xor <32 x i8> %m, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %3 = and <32 x i8> %b, %2
+ %4 = or <32 x i8> %1, %3
+ ret <32 x i8> %4
+}
+
+define <16 x i8> @pcmov_16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %m) {
+; CHECK-LABEL: pcmov_16i8:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = and <16 x i8> %a, %m
+ %2 = xor <16 x i8> %m, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %3 = and <16 x i8> %b, %2
+ %4 = or <16 x i8> %1, %3
+ ret <16 x i8> %4
+}
OpenPOWER on IntegriCloud