summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp46
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp30
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatterns.td41
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPseudo.td10
4 files changed, 112 insertions, 15 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index e5f49ca77a9..0163b2e2bdc 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -241,22 +241,31 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
case MVT::v32i16:
case MVT::v16i32:
case MVT::v8i64:
- if (isAlignedMemNode(LD))
- Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
- else
+ if (isAlignedMemNode(LD)) {
+ if (LD->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
+ }
break;
// 128B
case MVT::v128i8:
case MVT::v64i16:
case MVT::v32i32:
case MVT::v16i64:
- if (isAlignedMemNode(LD))
- Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B
- : Hexagon::V6_vL32b_ai_128B;
- else
+ if (isAlignedMemNode(LD)) {
+ if (LD->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi_128B
+ : Hexagon::V6_vL32b_nt_ai_128B;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B
+ : Hexagon::V6_vL32b_ai_128B;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi_128B
: Hexagon::V6_vL32Ub_ai_128B;
+ }
break;
default:
llvm_unreachable("Unexpected memory type in indexed load");
@@ -529,22 +538,31 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
case MVT::v32i16:
case MVT::v16i32:
case MVT::v8i64:
- if (isAlignedMemNode(ST))
- Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
- else
+ if (isAlignedMemNode(ST)) {
+ if (ST->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
+ }
break;
// 128B
case MVT::v128i8:
case MVT::v64i16:
case MVT::v32i32:
case MVT::v16i64:
- if (isAlignedMemNode(ST))
- Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B
- : Hexagon::V6_vS32b_ai_128B;
- else
+ if (isAlignedMemNode(ST)) {
+ if (ST->isNonTemporal())
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi_128B
+ : Hexagon::V6_vS32b_nt_ai_128B;
+ else
+ Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B
+ : Hexagon::V6_vS32b_ai_128B;
+ } else {
Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi_128B
: Hexagon::V6_vS32Ub_ai_128B;
+ }
break;
default:
llvm_unreachable("Unexpected memory type in indexed store");
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index ff99a104739..c77c669f4ca 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -250,15 +250,19 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
case Hexagon::L2_loadri_io:
case Hexagon::L2_loadrd_io:
case Hexagon::V6_vL32b_ai:
+ case Hexagon::V6_vL32b_nt_ai:
case Hexagon::V6_vL32b_ai_128B:
+ case Hexagon::V6_vL32b_nt_ai_128B:
case Hexagon::V6_vL32Ub_ai:
case Hexagon::V6_vL32Ub_ai_128B:
case Hexagon::LDriw_pred:
case Hexagon::LDriw_mod:
case Hexagon::PS_vloadrq_ai:
case Hexagon::PS_vloadrw_ai:
+ case Hexagon::PS_vloadrw_nt_ai:
case Hexagon::PS_vloadrq_ai_128B:
- case Hexagon::PS_vloadrw_ai_128B: {
+ case Hexagon::PS_vloadrw_ai_128B:
+ case Hexagon::PS_vloadrw_nt_ai_128B: {
const MachineOperand OpFI = MI.getOperand(1);
if (!OpFI.isFI())
return 0;
@@ -2473,20 +2477,28 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
switch (Opcode) {
case Hexagon::PS_vstorerq_ai:
case Hexagon::PS_vstorerw_ai:
+ case Hexagon::PS_vstorerw_nt_ai:
case Hexagon::PS_vloadrq_ai:
case Hexagon::PS_vloadrw_ai:
+ case Hexagon::PS_vloadrw_nt_ai:
case Hexagon::V6_vL32b_ai:
case Hexagon::V6_vS32b_ai:
+ case Hexagon::V6_vL32b_nt_ai:
+ case Hexagon::V6_vS32b_nt_ai:
case Hexagon::V6_vL32Ub_ai:
case Hexagon::V6_vS32Ub_ai:
return isShiftedInt<4,6>(Offset);
case Hexagon::PS_vstorerq_ai_128B:
case Hexagon::PS_vstorerw_ai_128B:
+ case Hexagon::PS_vstorerw_nt_ai_128B:
case Hexagon::PS_vloadrq_ai_128B:
case Hexagon::PS_vloadrw_ai_128B:
+ case Hexagon::PS_vloadrw_nt_ai_128B:
case Hexagon::V6_vL32b_ai_128B:
case Hexagon::V6_vS32b_ai_128B:
+ case Hexagon::V6_vL32b_nt_ai_128B:
+ case Hexagon::V6_vS32b_nt_ai_128B:
case Hexagon::V6_vL32Ub_ai_128B:
case Hexagon::V6_vS32Ub_ai_128B:
return isShiftedInt<4,7>(Offset);
@@ -3198,11 +3210,19 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
return Hexagon::V6_vL32b_cur_pi;
case Hexagon::V6_vL32b_ai:
return Hexagon::V6_vL32b_cur_ai;
+ case Hexagon::V6_vL32b_nt_pi:
+ return Hexagon::V6_vL32b_nt_cur_pi;
+ case Hexagon::V6_vL32b_nt_ai:
+ return Hexagon::V6_vL32b_nt_cur_ai;
//128B
case Hexagon::V6_vL32b_pi_128B:
return Hexagon::V6_vL32b_cur_pi_128B;
case Hexagon::V6_vL32b_ai_128B:
return Hexagon::V6_vL32b_cur_ai_128B;
+ case Hexagon::V6_vL32b_nt_pi_128B:
+ return Hexagon::V6_vL32b_nt_cur_pi_128B;
+ case Hexagon::V6_vL32b_nt_ai_128B:
+ return Hexagon::V6_vL32b_nt_cur_ai_128B;
}
return 0;
}
@@ -3215,11 +3235,19 @@ int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const {
return Hexagon::V6_vL32b_pi;
case Hexagon::V6_vL32b_cur_ai:
return Hexagon::V6_vL32b_ai;
+ case Hexagon::V6_vL32b_nt_cur_pi:
+ return Hexagon::V6_vL32b_nt_pi;
+ case Hexagon::V6_vL32b_nt_cur_ai:
+ return Hexagon::V6_vL32b_nt_ai;
//128B
case Hexagon::V6_vL32b_cur_pi_128B:
return Hexagon::V6_vL32b_pi_128B;
case Hexagon::V6_vL32b_cur_ai_128B:
return Hexagon::V6_vL32b_ai_128B;
+ case Hexagon::V6_vL32b_nt_cur_pi_128B:
+ return Hexagon::V6_vL32b_nt_pi_128B;
+ case Hexagon::V6_vL32b_nt_cur_ai_128B:
+ return Hexagon::V6_vL32b_nt_ai_128B;
}
return 0;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 44475d2e736..ba98b899493 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -2770,6 +2770,9 @@ def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [
multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Aligned stores
+ def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
+ (V6_vS32b_nt_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
(V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
Requires<[UseHVXSgl]>;
@@ -2778,6 +2781,9 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
Requires<[UseHVXSgl]>;
// 128B Aligned stores
+ def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+ (V6_vS32b_nt_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
(V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
@@ -2787,6 +2793,11 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Fold Add R+OFF into vector store.
let AddedComplexity = 10 in {
+ def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1),
+ (add IntRegs:$src2, Iss4_6:$offset)),
+ (V6_vS32b_nt_ai IntRegs:$src2, Iss4_6:$offset,
+ (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
(add IntRegs:$src2, Iss4_6:$offset)),
(V6_vS32b_ai IntRegs:$src2, Iss4_6:$offset,
@@ -2799,6 +2810,11 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
Requires<[UseHVXSgl]>;
// Fold Add R+OFF into vector store 128B.
+ def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1),
+ (add IntRegs:$src2, Iss4_7:$offset)),
+ (V6_vS32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset,
+ (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
(add IntRegs:$src2, Iss4_7:$offset)),
(V6_vS32b_ai_128B IntRegs:$src2, Iss4_7:$offset,
@@ -2820,6 +2836,9 @@ defm : vS32b_ai_pats <v8i64, v16i64>;
multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Aligned loads
+ def : Pat < (VTSgl (alignednontemporalload IntRegs:$addr)),
+ (V6_vL32b_nt_ai IntRegs:$addr, 0) >,
+ Requires<[UseHVXSgl]>;
def : Pat < (VTSgl (alignedload IntRegs:$addr)),
(V6_vL32b_ai IntRegs:$addr, 0) >,
Requires<[UseHVXSgl]>;
@@ -2828,6 +2847,9 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
Requires<[UseHVXSgl]>;
// 128B Load
+ def : Pat < (VTDbl (alignednontemporalload IntRegs:$addr)),
+ (V6_vL32b_nt_ai_128B IntRegs:$addr, 0) >,
+ Requires<[UseHVXDbl]>;
def : Pat < (VTDbl (alignedload IntRegs:$addr)),
(V6_vL32b_ai_128B IntRegs:$addr, 0) >,
Requires<[UseHVXDbl]>;
@@ -2837,6 +2859,9 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Fold Add R+OFF into vector load.
let AddedComplexity = 10 in {
+ def : Pat<(VTDbl (alignednontemporalload (add IntRegs:$src2, Iss4_7:$offset))),
+ (V6_vL32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
+ Requires<[UseHVXDbl]>;
def : Pat<(VTDbl (alignedload (add IntRegs:$src2, Iss4_7:$offset))),
(V6_vL32b_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
Requires<[UseHVXDbl]>;
@@ -2844,6 +2869,9 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
(V6_vL32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
Requires<[UseHVXDbl]>;
+ def : Pat<(VTSgl (alignednontemporalload (add IntRegs:$src2, Iss4_6:$offset))),
+ (V6_vL32b_nt_ai IntRegs:$src2, Iss4_6:$offset)>,
+ Requires<[UseHVXSgl]>;
def : Pat<(VTSgl (alignedload (add IntRegs:$src2, Iss4_6:$offset))),
(V6_vL32b_ai IntRegs:$src2, Iss4_6:$offset)>,
Requires<[UseHVXSgl]>;
@@ -2859,6 +2887,9 @@ defm : vL32b_ai_pats <v16i32, v32i32>;
defm : vL32b_ai_pats <v8i64, v16i64>;
multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat<(alignednontemporalstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+ (PS_vstorerw_nt_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
(PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
Requires<[UseHVXSgl]>;
@@ -2866,6 +2897,10 @@ multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
(PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
Requires<[UseHVXSgl]>;
+ def : Pat<(alignednontemporalstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+ (PS_vstorerw_nt_ai_128B IntRegs:$addr, 0,
+ (VTDbl VecDblRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
(PS_vstorerw_ai_128B IntRegs:$addr, 0,
(VTDbl VecDblRegs128B:$src1))>,
@@ -2882,6 +2917,9 @@ defm : STrivv_pats <v32i32, v64i32>;
defm : STrivv_pats <v16i64, v32i64>;
multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat<(VTSgl (alignednontemporalload I32:$addr)),
+ (PS_vloadrw_nt_ai I32:$addr, 0)>,
+ Requires<[UseHVXSgl]>;
def : Pat<(VTSgl (alignedload I32:$addr)),
(PS_vloadrw_ai I32:$addr, 0)>,
Requires<[UseHVXSgl]>;
@@ -2889,6 +2927,9 @@ multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
(PS_vloadrwu_ai I32:$addr, 0)>,
Requires<[UseHVXSgl]>;
+ def : Pat<(VTDbl (alignednontemporalload I32:$addr)),
+ (PS_vloadrw_nt_ai_128B I32:$addr, 0)>,
+ Requires<[UseHVXDbl]>;
def : Pat<(VTDbl (alignedload I32:$addr)),
(PS_vloadrw_ai_128B I32:$addr, 0)>,
Requires<[UseHVXDbl]>;
diff --git a/llvm/lib/Target/Hexagon/HexagonPseudo.td b/llvm/lib/Target/Hexagon/HexagonPseudo.td
index 93fb688fc1c..b42c1ab975a 100644
--- a/llvm/lib/Target/Hexagon/HexagonPseudo.td
+++ b/llvm/lib/Target/Hexagon/HexagonPseudo.td
@@ -407,6 +407,11 @@ def PS_vstorerw_ai: STrivv_template<VecDblRegs, V6_vS32b_ai>,
def PS_vstorerw_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32b_ai_128B>,
Requires<[HasV60T,UseHVXDbl]>;
+def PS_vstorerw_nt_ai: STrivv_template<VecDblRegs, V6_vS32b_nt_ai>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vstorerw_nt_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32b_nt_ai_128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
def PS_vstorerwu_ai: STrivv_template<VecDblRegs, V6_vS32Ub_ai>,
Requires<[HasV60T,UseHVXSgl]>;
def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32Ub_ai_128B>,
@@ -433,6 +438,11 @@ def PS_vloadrw_ai: LDrivv_template<VecDblRegs, V6_vL32b_ai>,
def PS_vloadrw_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32b_ai_128B>,
Requires<[HasV60T,UseHVXDbl]>;
+def PS_vloadrw_nt_ai: LDrivv_template<VecDblRegs, V6_vL32b_nt_ai>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vloadrw_nt_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32b_nt_ai_128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
def PS_vloadrwu_ai: LDrivv_template<VecDblRegs, V6_vL32Ub_ai>,
Requires<[HasV60T,UseHVXSgl]>;
def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32Ub_ai_128B>,
OpenPOWER on IntegriCloud