diff options
author | Jeroen Ketema <j.ketema@imperial.ac.uk> | 2015-09-30 10:56:37 +0000 |
---|---|---|
committer | Jeroen Ketema <j.ketema@imperial.ac.uk> | 2015-09-30 10:56:37 +0000 |
commit | ab99b59e8ca28f5366fb95b497e64ae44d67a9ca (patch) | |
tree | a9b8c1d1af1f1c409aa1e51dcfd9ea96fdf314f1 /llvm/lib | |
parent | 42e651fa43747743a6161b1c53e59c743d52a71c (diff) | |
download | bcm5719-llvm-ab99b59e8ca28f5366fb95b497e64ae44d67a9ca.tar.gz bcm5719-llvm-ab99b59e8ca28f5366fb95b497e64ae44d67a9ca.zip |
[ARM][NEON] Use address space in vld([1234]|[234]lane) and vst([1234]|[234]lane) instructions
This commit changes the interface of the vld[1234], vld[234]lane, and vst[1234],
vst[234]lane ARM neon intrinsics and associates an address space with the
pointer that these intrinsics take. This changes, e.g.,
<2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)
to
<2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8*, i32)
This change ensures that address spaces are fully taken into account in the ARM
target during lowering of interleaved loads and stores.
Differential Revision: http://reviews.llvm.org/D12985
llvm-svn: 248887
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 55 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 13 |
2 files changed, 62 insertions, 6 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 22ff7d39b7b..6c9fdd6fcea 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Regex.h" #include <cstring> using namespace llvm; @@ -92,8 +93,41 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { F->arg_begin()->getType()); return true; } + Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); + if (vldRegex.match(Name)) { + auto fArgs = F->getFunctionType()->params(); + SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); + // Can't use Intrinsic::getDeclaration here as the return types might + // then only be structurally equal. + FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); + NewFn = Function::Create(fType, F->getLinkage(), + "llvm." + Name + ".p0i8", F->getParent()); + return true; + } + Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); + if (vstRegex.match(Name)) { + static Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, + Intrinsic::arm_neon_vst2, + Intrinsic::arm_neon_vst3, + Intrinsic::arm_neon_vst4}; + + static Intrinsic::ID StoreLaneInts[] = {Intrinsic::arm_neon_vst2lane, + Intrinsic::arm_neon_vst3lane, + Intrinsic::arm_neon_vst4lane}; + + auto fArgs = F->getFunctionType()->params(); + Type *Tys[] = {fArgs[0], fArgs[1]}; + if (Name.find("lane") == StringRef::npos) + NewFn = Intrinsic::getDeclaration(F->getParent(), + StoreInts[fArgs.size() - 3], Tys); + else + NewFn = Intrinsic::getDeclaration(F->getParent(), + StoreLaneInts[fArgs.size() - 5], Tys); + return true; + } break; } + case 'c': { if (Name.startswith("ctlz.") && F->arg_size() == 1) { F->setName(Name + ".old"); @@ -651,6 +685,27 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { default: llvm_unreachable("Unknown function for CallInst upgrade."); + case Intrinsic::arm_neon_vld1: + case Intrinsic::arm_neon_vld2: + case Intrinsic::arm_neon_vld3: + case Intrinsic::arm_neon_vld4: + case Intrinsic::arm_neon_vld2lane: + case Intrinsic::arm_neon_vld3lane: + case Intrinsic::arm_neon_vld4lane: + case Intrinsic::arm_neon_vst1: + case Intrinsic::arm_neon_vst2: + case Intrinsic::arm_neon_vst3: + case Intrinsic::arm_neon_vst4: + case Intrinsic::arm_neon_vst2lane: + case Intrinsic::arm_neon_vst3lane: + case Intrinsic::arm_neon_vst4lane: { + SmallVector<Value *, 4> Args(CI->arg_operands().begin(), + CI->arg_operands().end()); + CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); + CI->eraseFromParent(); + return; + } + case Intrinsic::ctlz: case Intrinsic::cttz: assert(CI->getNumArgOperands() == 1 && diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 14defa4e781..063fbffb2bb 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -11802,9 +11802,6 @@ bool ARMTargetLowering::lowerInterleavedLoad( Intrinsic::arm_neon_vld3, Intrinsic::arm_neon_vld4}; - Function *VldnFunc = - Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy); - IRBuilder<> Builder(LI); SmallVector<Value *, 2> Ops; @@ -11812,6 +11809,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr)); Ops.push_back(Builder.getInt32(LI->getAlignment())); + Type *Tys[] = { VecTy, Int8Ptr }; + Function *VldnFunc = + Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); // Replace uses of each shufflevector with the corresponding vector loaded @@ -11903,14 +11903,15 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4}; - Function *VstNFunc = Intrinsic::getDeclaration( - SI->getModule(), StoreInts[Factor - 2], SubVecTy); - SmallVector<Value *, 6> Ops; Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr)); + Type *Tys[] = { Int8Ptr, SubVecTy }; + Function *VstNFunc = Intrinsic::getDeclaration( + SI->getModule(), StoreInts[Factor - 2], Tys); + // Split the shufflevector operands into sub vectors for the new vstN call. for (unsigned i = 0; i < Factor; i++) Ops.push_back(Builder.CreateShuffleVector( |