summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-09-15 23:00:59 +0000
committerCraig Topper <craig.topper@intel.com>2017-09-15 23:00:59 +0000
commit8cd7b0cd2cece3876e2d305ab185ec2931a5ac95 (patch)
treedc0557c3237e85e85f22009f0032c52172066904 /clang/lib/CodeGen
parent51d8f887db26815576af25c24c5c70b1b03b6830 (diff)
downloadbcm5719-llvm-8cd7b0cd2cece3876e2d305ab185ec2931a5ac95.tar.gz
bcm5719-llvm-8cd7b0cd2cece3876e2d305ab185ec2931a5ac95.zip
[X86] Use native shuffle vector for the perm2f128 intrinsics
This patch replaces the perm2f128 intrinsics with native shuffle vectors. This uses a pretty simple approach to allocate source 0 to the lower half input and source 1 to the upper half input. Then its just a matter of filling in the indices to use either the lower or upper half of that specific source. This can result in the same source being used by both operands. InstCombine or SelectionDAGBuilder should be able to clean that up. Differential Revision: https://reviews.llvm.org/D37892 llvm-svn: 313418
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp39
1 files changed, 39 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4240d2de536..3e353ea7eca 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7923,6 +7923,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86Select(*this, Ops[4], Align, Ops[3]);
}
+ case X86::BI__builtin_ia32_vperm2f128_pd256:
+ case X86::BI__builtin_ia32_vperm2f128_ps256:
+ case X86::BI__builtin_ia32_vperm2f128_si256:
+ case X86::BI__builtin_ia32_permti256: {
+ unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+
+ // This takes a very simple approach since there are two lanes and a
+ // shuffle can have 2 inputs. So we reserve the first input for the first
+ // lane and the second input for the second lane. This may result in
+ // duplicate sources, but this can be dealt with in the backend.
+
+ Value *OutOps[2];
+ uint32_t Indices[8];
+ for (unsigned l = 0; l != 2; ++l) {
+ // Determine the source for this lane.
+ if (Imm & (1 << ((l * 4) + 3)))
+ OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
+ else if (Imm & (1 << ((l * 4) + 1)))
+ OutOps[l] = Ops[1];
+ else
+ OutOps[l] = Ops[0];
+
+ for (unsigned i = 0; i != NumElts/2; ++i) {
+ // Start with ith element of the source for this lane.
+ unsigned Idx = (l * NumElts) + i;
+ // If bit 0 of the immediate half is set, switch to the high half of
+ // the source.
+ if (Imm & (1 << (l * 4)))
+ Idx += NumElts/2;
+ Indices[(l * (NumElts/2)) + i] = Idx;
+ }
+ }
+
+ return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
+ makeArrayRef(Indices, NumElts),
+ "vperm");
+ }
+
case X86::BI__builtin_ia32_movnti:
case X86::BI__builtin_ia32_movnti64:
case X86::BI__builtin_ia32_movntsd:
OpenPOWER on IntegriCloud