summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM/ARMISelLowering.cpp
diff options
context:
space:
mode:
authorBill Wendling <isanbard@gmail.com>2011-03-14 23:02:38 +0000
committerBill Wendling <isanbard@gmail.com>2011-03-14 23:02:38 +0000
commite1fd78f2bcb843c95e2da2c91bf82c995081692b (patch)
treee104616a4cf6d16511552028a2a4304a36850dbf /llvm/lib/Target/ARM/ARMISelLowering.cpp
parente2eb8076016f3cc30082d815469532b5ff22bb5d (diff)
downloadbcm5719-llvm-e1fd78f2bcb843c95e2da2c91bf82c995081692b.tar.gz
bcm5719-llvm-e1fd78f2bcb843c95e2da2c91bf82c995081692b.zip
Generate a VTBL instruction instead of a series of loads and stores when we
can. As Nate pointed out, VTBL isn't super performant, but it *has* to be better than this: _shuf: @ BB#0: @ %entry push {r4, r7, lr} add r7, sp, #4 sub sp, #12 mov r4, sp bic r4, r4, #7 mov sp, r4 mov r2, sp vmov d16, r0, r1 orr r0, r2, #6 orr r3, r2, #7 vst1.8 {d16[0]}, [r3] vst1.8 {d16[5]}, [r0] subs r4, r7, #4 orr r0, r2, #5 vst1.8 {d16[4]}, [r0] orr r0, r2, #4 vst1.8 {d16[4]}, [r0] orr r0, r2, #3 vst1.8 {d16[0]}, [r0] orr r0, r2, #2 vst1.8 {d16[2]}, [r0] orr r0, r2, #1 vst1.8 {d16[1]}, [r0] vst1.8 {d16[3]}, [r2] vldr.64 d16, [sp] vmov r0, r1, d16 mov sp, r4 pop {r4, r7, pc} The "illegal" testcase in vext.ll is no longer illegal. <rdar://problem/9078775> llvm-svn: 127630
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp35
1 files changed, 34 insertions, 1 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 3a340511855..665c8233168 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -852,6 +852,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VTBL1: return "ARMISD::VTBL1";
+ case ARMISD::VTBL2: return "ARMISD::VTBL2";
+ case ARMISD::VTBL3: return "ARMISD::VTBL3";
+ case ARMISD::VTBL4: return "ARMISD::VTBL4";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
@@ -4055,6 +4059,29 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
}
}
+static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
+ SmallVectorImpl<int> &ShuffleMask,
+ SelectionDAG &DAG) {
+ // Check to see if we can use the VTBL instruction.
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+
+ SmallVector<SDValue, 8> VTBLMask;
+ for (SmallVectorImpl<int>::iterator
+ I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
+ VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
+
+ if (V2.getNode()->getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+ else
+ return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+}
+
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -4172,6 +4199,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
+ if (VT == MVT::v8i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
return SDValue();
}
@@ -4534,7 +4567,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GlobalAddress:
return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
- case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
OpenPOWER on IntegriCloud