[ARM] Don't lower f16 interleaved accesses.

There are no vldN/vstN f16 variants, even with +fullfp16. We could use the i16 variants, but, in practice, even with +fullfp16, the f16 sequence leading to the i16 shuffle usually gets scalarized. We'd need to improve our support for f16 codegen before getting there. Reject f16 interleaved accesses. If we try to emit the f16 intrinsics, we'll just end up with a selection failure. llvm-svn: 294818
author: Ahmed Bougacha <ahmed.bougacha@gmail.com> 2017-02-11 01:53:00 +0000
committer: Ahmed Bougacha <ahmed.bougacha@gmail.com> 2017-02-11 01:53:00 +0000
commit: fc979dc9ddf6a29977381a606846c762457d41de (patch)
tree: 400850dc54959f271cc2ac98acae5128513fbaaf /llvm/lib/Target
parent: f37fb89edcf21596acdc0d20925a394826ad1437 (diff)
download: bcm5719-llvm-fc979dc9ddf6a29977381a606846c762457d41de.tar.gz
bcm5719-llvm-fc979dc9ddf6a29977381a606846c762457d41de.zip
1 files changed, 14 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4ef58e1e178..6e674d96db3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13290,6 +13290,11 @@ bool ARMTargetLowering::lowerInterleavedLoad(
   if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits)
     return false;
 
+  // Skip if the vector has f16 elements: even though we could do an i16 vldN,
+  // we can't hold the f16 vectors and will end up converting via f32.
+  if (EltTy->isHalfTy())
+    return false;
+
   // A pointer vector can not be the return type of the ldN intrinsics. Need to
   // load integer vectors first and then convert to pointer vectors.
   if (EltTy->isPointerTy())
@@ -13307,6 +13312,8 @@ bool ARMTargetLowering::lowerInterleavedLoad(
   Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
   Ops.push_back(Builder.getInt32(LI->getAlignment()));
 
+  assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
+
   Type *Tys[] = { VecTy, Int8Ptr };
   Function *VldnFunc =
       Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
@@ -13380,6 +13387,11 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
       EltIs64Bits)
     return false;
 
+  // Skip if the vector has f16 elements: even though we could do an i16 vldN,
+  // we can't hold the f16 vectors and will end up converting via f32.
+  if (EltTy->isHalfTy())
+    return false;
+
   Value *Op0 = SVI->getOperand(0);
   Value *Op1 = SVI->getOperand(1);
   IRBuilder<> Builder(SI);
@@ -13406,6 +13418,8 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
   Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
   Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
 
+  assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
+
   Type *Tys[] = { Int8Ptr, SubVecTy };
   Function *VstNFunc = Intrinsic::getDeclaration(
       SI->getModule(), StoreInts[Factor - 2], Tys);
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>	2017-02-11 01:53:00 +0000
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>	2017-02-11 01:53:00 +0000
commit	fc979dc9ddf6a29977381a606846c762457d41de (patch)
tree	400850dc54959f271cc2ac98acae5128513fbaaf /llvm/lib/Target
parent	f37fb89edcf21596acdc0d20925a394826ad1437 (diff)
download	bcm5719-llvm-fc979dc9ddf6a29977381a606846c762457d41de.tar.gz bcm5719-llvm-fc979dc9ddf6a29977381a606846c762457d41de.zip