summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGBuiltin.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-07 17:28:03 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-07 17:28:03 +0000
commitd3623155a26fa01079612dd654d69f94f751e302 (patch)
treee84cd3ba5ba2f07a86ce97062865b168d42b4e9e /clang/lib/CodeGen/CGBuiltin.cpp
parent6887aa8adcc82974de0a3a3b2b76e5497e4f4e55 (diff)
downloadbcm5719-llvm-d3623155a26fa01079612dd654d69f94f751e302.tar.gz
bcm5719-llvm-d3623155a26fa01079612dd654d69f94f751e302.zip
[X86] Add back builtins for _mm_slli_si128/_mm_srli_si128 and similar intrinsics.
We still lower them to native shuffle IR, but we do it in CGBuiltin.cpp now. This allows us to check the target feature and ensure the immediate fits in 8 bits. This also improves our -O0 codegen slightly because we're able to see the zeroinitializer in the shuffle. It looks like it got lost behind a store+load previously. llvm-svn: 334208
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp62
1 files changed, 62 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a086b4d926c..a126e75b19f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9262,6 +9262,68 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
"vperm");
}
+ case X86::BI__builtin_ia32_pslldqi128:
+ case X86::BI__builtin_ia32_pslldqi256:
+ case X86::BI__builtin_ia32_pslldqi512: {
+ // Shift value is in bits so divide by 8.
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
+ llvm::Type *ResultType = Ops[0]->getType();
+ // Builtin type is vXi64 so multiply by 8 to get bytes.
+ unsigned NumElts = ResultType->getVectorNumElements() * 8;
+
+ // If pslldq is shifting the vector more than 15 bytes, emit zero.
+ if (ShiftVal >= 16)
+ return llvm::Constant::getNullValue(ResultType);
+
+ uint32_t Indices[64];
+ // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
+ for (unsigned l = 0; l != NumElts; l += 16) {
+ for (unsigned i = 0; i != 16; ++i) {
+ unsigned Idx = NumElts + i - ShiftVal;
+ if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
+ Indices[l + i] = Idx + l;
+ }
+ }
+
+ llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
+ Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
+ Value *Zero = llvm::Constant::getNullValue(VecTy);
+ Value *SV = Builder.CreateShuffleVector(Zero, Cast,
+ makeArrayRef(Indices, NumElts),
+ "pslldq");
+ return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
+ }
+ case X86::BI__builtin_ia32_psrldqi128:
+ case X86::BI__builtin_ia32_psrldqi256:
+ case X86::BI__builtin_ia32_psrldqi512: {
+ // Shift value is in bits so divide by 8.
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
+ llvm::Type *ResultType = Ops[0]->getType();
+ // Builtin type is vXi64 so multiply by 8 to get bytes.
+ unsigned NumElts = ResultType->getVectorNumElements() * 8;
+
+ // If psrldq is shifting the vector more than 15 bytes, emit zero.
+ if (ShiftVal >= 16)
+ return llvm::Constant::getNullValue(ResultType);
+
+ uint32_t Indices[64];
+ // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
+ for (unsigned l = 0; l != NumElts; l += 16) {
+ for (unsigned i = 0; i != 16; ++i) {
+ unsigned Idx = i + ShiftVal;
+ if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
+ Indices[l + i] = Idx + l;
+ }
+ }
+
+ llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
+ Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
+ Value *Zero = llvm::Constant::getNullValue(VecTy);
+ Value *SV = Builder.CreateShuffleVector(Cast, Zero,
+ makeArrayRef(Indices, NumElts),
+ "psrldq");
+ return Builder.CreateBitCast(SV, ResultType, "cast");
+ }
case X86::BI__builtin_ia32_movnti:
case X86::BI__builtin_ia32_movnti64:
case X86::BI__builtin_ia32_movntsd:
OpenPOWER on IntegriCloud