diff options
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 55 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td | 116 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/select-instr-align.ll | 31 |
6 files changed, 187 insertions, 56 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index 81c85c4aa21..ac91169a7b5 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1086,14 +1086,16 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized( return; unsigned LFS = MFI->getLocalFrameSize(); - int Offset = -LFS; for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i)) continue; - int S = MFI->getObjectSize(i); - LFS += S; - Offset -= S; - MFI->mapLocalFrameObject(i, Offset); + unsigned S = MFI->getObjectSize(i); + // Reduce the alignment to at most 8. This will require unaligned vector + // stores if they happen here. + unsigned A = std::max(MFI->getObjectAlignment(i), 8U); + MFI->setObjectAlignment(i, 8); + LFS = alignTo(LFS+S, A); + MFI->mapLocalFrameObject(i, -LFS); } MFI->setLocalFrameSize(LFS); diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index b0e04cf128e..6e36c2fd81b 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -189,6 +189,7 @@ public: private: bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src); + bool isAlignedMemNode(const MemSDNode *N) const; }; // end HexagonDAGToDAGISel } // end anonymous namespace @@ -414,20 +415,24 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { } else if (LoadedVT == MVT::v16i32 || LoadedVT == MVT::v8i64 || LoadedVT == MVT::v32i16 || LoadedVT == MVT::v64i8) { HasVecOffset = true; - if (HII->isValidAutoIncImm(LoadedVT, Val)) { - Opcode = Hexagon::V6_vL32b_pi; - } + bool Aligned = isAlignedMemNode(LD); + if (HII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = Aligned ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32Ub_pi; else - Opcode = Hexagon::V6_vL32b_ai; + Opcode = Aligned ? Hexagon::V6_vL32b_ai : Hexagon::V6_vL32Ub_ai; // 128B } else if (LoadedVT == MVT::v32i32 || LoadedVT == MVT::v16i64 || LoadedVT == MVT::v64i16 || LoadedVT == MVT::v128i8) { - HasVecOffset = true; - if (HII->isValidAutoIncImm(LoadedVT, Val)) { - Opcode = Hexagon::V6_vL32b_pi_128B; + if (HST->useHVXOps()) { + bool Aligned = isAlignedMemNode(LD); + HasVecOffset = true; + if (HII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = Aligned ? Hexagon::V6_vL32b_pi_128B + : Hexagon::V6_vL32Ub_pi_128B; + else + Opcode = Aligned ? Hexagon::V6_vL32b_ai_128B + : Hexagon::V6_vL32Ub_ai_128B; } - else - Opcode = Hexagon::V6_vL32b_ai_128B; } else llvm_unreachable("unknown memory type"); @@ -687,13 +692,19 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi; else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 || StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) { - Opcode = Hexagon::V6_vS32b_pi; + if (isAlignedMemNode(ST)) + Opcode = Hexagon::V6_vS32b_pi; + else + Opcode = Hexagon::V6_vS32Ub_pi; } // 128B else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 || StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) { - Opcode = Hexagon::V6_vS32b_pi_128B; - } else llvm_unreachable("unknown memory type"); + if (HST->useHVXOps()) + Opcode = isAlignedMemNode(ST) ? Hexagon::V6_vS32b_pi_128B + : Hexagon::V6_vS32Ub_pi_128B; + } else + llvm_unreachable("unknown memory type"); if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) { assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); @@ -728,12 +739,20 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io; else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io; else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 || - StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) - Opcode = Hexagon::V6_vS32b_ai; + StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) { + if (isAlignedMemNode(ST)) + Opcode = Hexagon::V6_vS32b_ai; + else + Opcode = Hexagon::V6_vS32Ub_ai; + } // 128B else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 || - StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) - Opcode = Hexagon::V6_vS32b_ai_128B; + StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) { + if (isAlignedMemNode(ST)) + Opcode = Hexagon::V6_vS32b_ai_128B; + else + Opcode = Hexagon::V6_vS32Ub_ai_128B; + } else llvm_unreachable("unknown memory type"); // Build regular store. @@ -1532,3 +1551,7 @@ bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, } return false; } + +bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const { + return N->getAlignment() >= N->getMemoryVT().getStoreSize(); +} diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 75244391fdd..f6ce321d5e5 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3019,6 +3019,32 @@ bool llvm::isPositiveHalfWord(SDNode *N) { } } +bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned AS, unsigned Align, bool *Fast) const { + if (Fast) + *Fast = false; + + switch (VT.getSimpleVT().SimpleTy) { + default: + return false; + case MVT::v64i8: + case MVT::v128i8: + case MVT::v256i8: + case MVT::v32i16: + case MVT::v64i16: + case MVT::v128i16: + case MVT::v16i32: + case MVT::v32i32: + case MVT::v64i32: + case MVT::v8i64: + case MVT::v16i64: + case MVT::v32i64: + return true; + } + return false; +} + + std::pair<const TargetRegisterClass*, uint8_t> HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const { diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index e6024b8cfad..89c32a4b418 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -238,6 +238,9 @@ bool isPositiveHalfWord(SDNode *N); /// the immediate into a register. bool isLegalICmpImmediate(int64_t Imm) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, + unsigned Align, bool *Fast) const override; + /// Returns relocation base for the given PIC jumptable. SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td b/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td index 897ada08153..c3f09b69ce8 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td @@ -10,6 +10,21 @@ // This file describes the Hexagon V60 instructions in TableGen format. // //===----------------------------------------------------------------------===// +def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ + return isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; + +def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{ + return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; + +def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ + return isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; + +def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ + return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; // Vector store @@ -102,7 +117,7 @@ let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in { //===----------------------------------------------------------------------===// // Vector stores with base + immediate offset - unconditional //===----------------------------------------------------------------------===// -let addrMode = BaseImmOffset, accessSize = Vector64Access in +let addrMode = BaseImmOffset, accessSize = Vector64Access, isPredicable = 1 in class T_vstore_ai <string mnemonic, string baseOp, Operand ImmOp, RegisterClass RC, bit isNT> : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), @@ -133,16 +148,16 @@ let isNVStorable = 1, isNonTemporal = 1 in { } let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { - def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vs32Ub_ai">, + def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vS32Ub_ai">, V6_vS32Ub_ai_enc; - def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vs32Ub_ai">, + def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vS32Ub_ai">, V6_vS32Ub_ai_128B_enc; } //===----------------------------------------------------------------------===// // Vector stores with base + immediate offset - unconditional new //===----------------------------------------------------------------------===// let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1, - Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in + isPredicable = 1, Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in class T_vstore_new_ai <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT> : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel { @@ -384,13 +399,15 @@ let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in { //===----------------------------------------------------------------------===// // Post increment vector stores with immediate offset. //===----------------------------------------------------------------------===// -let addrMode = PostInc in +let addrMode = PostInc, isPredicable = 1 in class T_vstore_pi <string mnemonic, string baseOp, Operand ImmOp, RegisterClass RC, bit isNT> : V6_STInst <(outs IntRegs:$_dst_), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [], - "$src1 = $_dst_">, NewValueRel; + "$src1 = $_dst_">, NewValueRel { + let BaseOpcode = baseOp; +} let accessSize = Vector64Access in class T_vstore_pi_64B <string mnemonic, string baseOp, bit isNT = 0> @@ -398,7 +415,7 @@ class T_vstore_pi_64B <string mnemonic, string baseOp, bit isNT = 0> let isCodeGenOnly = 1, accessSize = Vector128Access in class T_vstore_pi_128B <string mnemonic, string baseOp, bit isNT = 0> - : T_vstore_pi <mnemonic, baseOp, s3_7Imm, VectorRegs128B, isNT>; + : T_vstore_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>; let isNVStorable = 1 in { def V6_vS32b_pi : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc; @@ -426,7 +443,7 @@ let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { //===----------------------------------------------------------------------===// let addrMode = PostInc, isNVStore = 1 in let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1, - opNewValue = 3, isNVStore = 1 in + isPredicable = 1, opNewValue = 3, isNVStore = 1 in class T_vstore_new_pi <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT> : V6_STInst <(outs IntRegs:$_dst_), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), @@ -644,6 +661,7 @@ let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in { //===----------------------------------------------------------------------===// // Post increment vector stores with register offset //===----------------------------------------------------------------------===// +let isPredicable = 1 in class T_vstore_ppu <string mnemonic, bit isNT = 0> : V6_STInst <(outs IntRegs:$_dst_), (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3), @@ -665,7 +683,7 @@ def V6_vS32Ub_ppu : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc; // Post increment .new vector stores with register offset //===----------------------------------------------------------------------===// let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1, - opNewValue = 3, isNVStore = 1 in + isPredicable = 1, opNewValue = 3, isNVStore = 1 in class T_vstore_new_ppu <bit isNT = 0> : V6_STInst <(outs IntRegs:$_dst_), (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3), @@ -785,30 +803,46 @@ defm : STrivv_pats <v16i64, v32i64>; multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { // Aligned stores - def : Pat<(store (VTSgl VectorRegs:$src1), IntRegs:$addr), + def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>, Requires<[UseHVXSgl]>; + def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32Ub_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; // 128B Aligned stores - def : Pat<(store (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>, Requires<[UseHVXDbl]>; + def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32Ub_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; // Fold Add R+IFF into vector store. - let AddedComplexity = 10 in - def : Pat<(store (VTSgl VectorRegs:$src1), - (add IntRegs:$src2, s4_6ImmPred:$offset)), - (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, - (VTSgl VectorRegs:$src1))>, - Requires<[UseHVXSgl]>; + let AddedComplexity = 10 in { + def : Pat<(alignedstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, s4_6ImmPred:$offset)), + (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, s4_6ImmPred:$offset)), + (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; - // Fold Add R+IFF into vector store 128B. - let AddedComplexity = 10 in - def : Pat<(store (VTDbl VectorRegs128B:$src1), - (add IntRegs:$src2, s4_7ImmPred:$offset)), - (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, - (VTDbl VectorRegs128B:$src1))>, - Requires<[UseHVXDbl]>; + // Fold Add R+IFF into vector store 128B. + def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, s4_7ImmPred:$offset)), + (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, s4_7ImmPred:$offset)), + (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + } } defm : vS32b_ai_pats <v64i8, v128i8>; @@ -843,25 +877,37 @@ defm : LDrivv_pats <v16i64, v32i64>; multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { // Aligned loads - def : Pat < (VTSgl (load IntRegs:$addr)), + def : Pat < (VTSgl (alignedload IntRegs:$addr)), (V6_vL32b_ai IntRegs:$addr, #0) >, Requires<[UseHVXSgl]>; + def : Pat < (VTSgl (unalignedload IntRegs:$addr)), + (V6_vL32Ub_ai IntRegs:$addr, #0) >, + Requires<[UseHVXSgl]>; // 128B Load - def : Pat < (VTDbl (load IntRegs:$addr)), + def : Pat < (VTDbl (alignedload IntRegs:$addr)), (V6_vL32b_ai_128B IntRegs:$addr, #0) >, Requires<[UseHVXDbl]>; + def : Pat < (VTDbl (unalignedload IntRegs:$addr)), + (V6_vL32Ub_ai_128B IntRegs:$addr, #0) >, + Requires<[UseHVXDbl]>; // Fold Add R+IFF into vector load. - let AddedComplexity = 10 in - def : Pat<(VTDbl (load (add IntRegs:$src2, s4_7ImmPred:$offset))), - (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, - Requires<[UseHVXDbl]>; - - let AddedComplexity = 10 in - def : Pat<(VTSgl (load (add IntRegs:$src2, s4_6ImmPred:$offset))), - (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, - Requires<[UseHVXSgl]>; + let AddedComplexity = 10 in { + def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), + (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + Requires<[UseHVXDbl]>; + def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), + (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + Requires<[UseHVXDbl]>; + + def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), + (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + Requires<[UseHVXSgl]>; + def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), + (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + Requires<[UseHVXSgl]>; + } } defm : vL32b_ai_pats <v64i8, v128i8>; diff --git a/llvm/test/CodeGen/Hexagon/select-instr-align.ll b/llvm/test/CodeGen/Hexagon/select-instr-align.ll new file mode 100644 index 00000000000..e3b2929d52f --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/select-instr-align.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=hexagon -enable-hexagon-hvx < %s | FileCheck %s +; CHECK-LABEL: aligned_load: +; CHECK: = vmem({{.*}}) +; CHECK-LABEL: aligned_store: +; CHECK: vmem({{.*}}) = +; CHECK-LABEL: unaligned_load: +; CHECK: = vmemu({{.*}}) +; CHECK-LABEL: unaligned_store: +; CHECK: vmemu({{.*}}) = + +define <16 x i32> @aligned_load(<16 x i32>* %p, <16 x i32> %a) { + %v = load <16 x i32>, <16 x i32>* %p, align 64 + ret <16 x i32> %v +} + +define void @aligned_store(<16 x i32>* %p, <16 x i32> %a) { + store <16 x i32> %a, <16 x i32>* %p, align 64 + ret void +} + +define <16 x i32> @unaligned_load(<16 x i32>* %p, <16 x i32> %a) { + %v = load <16 x i32>, <16 x i32>* %p, align 32 + ret <16 x i32> %v +} + +define void @unaligned_store(<16 x i32>* %p, <16 x i32> %a) { + store <16 x i32> %a, <16 x i32>* %p, align 32 + ret void +} + + |