summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-07-29 01:03:53 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-07-29 01:03:53 +0000
commit4e309b08617e6441ca1ee265f5c0ebb5d3008e03 (patch)
tree4ca661c2bbea7c1a09b04106c881c8452eecf6f9 /llvm/lib/Target
parent4d060b71cc6dfb3a87ee44dc5d48e901e9011487 (diff)
downloadbcm5719-llvm-4e309b08617e6441ca1ee265f5c0ebb5d3008e03.tar.gz
bcm5719-llvm-4e309b08617e6441ca1ee265f5c0ebb5d3008e03.zip
AMDGPU: Start selecting global instructions
llvm-svn: 309470
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td1
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td93
3 files changed, 107 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 29231adb7a6..16ad23a0783 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -140,6 +140,10 @@ private:
bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
+ bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, SDValue &SLC) const;
+
+ template <bool IsSigned>
bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
@@ -1324,6 +1328,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
return true;
}
+template <bool IsSigned>
bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
SDValue &VAddr,
SDValue &Offset,
@@ -1334,8 +1339,10 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
- uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getZExtValue();
- if (isUInt<12>(COffsetVal)) {
+ int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
+
+ if ((IsSigned && isInt<13>(COffsetVal)) ||
+ (!IsSigned && isUInt<12>(COffsetVal))) {
Addr = N0;
OffsetVal = COffsetVal;
}
@@ -1352,7 +1359,14 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
- return SelectFlatOffset(Addr, VAddr, Offset, SLC);
+ return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
+}
+
+bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
+ SDValue &VAddr,
+ SDValue &Offset,
+ SDValue &SLC) const {
+ return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
}
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 4e688ab0b10..b9b1773f027 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -247,6 +247,7 @@ class GlobalLoad <SDPatternOperator op> : GlobalMemOp <
>;
def global_load : GlobalLoad <load>;
+def global_atomic_load : GlobalLoad<atomic_load>;
// Global address space stores
class GlobalStore <SDPatternOperator op> : GlobalMemOp <
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 8913b620c4f..df9fcff509e 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -8,7 +8,10 @@
//===----------------------------------------------------------------------===//
def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
-def FLATOffset : ComplexPattern<i64, 3, "SelectFlat", [], [], -10>;
+def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [], -10>;
+
+def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [], -10>;
+def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [], -10>;
//===----------------------------------------------------------------------===//
// FLAT classes
@@ -289,7 +292,7 @@ multiclass FLAT_Global_Atomic_Pseudo<
(ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
" $vdst, $vaddr, $vdata, off$offset glc$slc",
[(set vt:$vdst,
- (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
+ (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
AtomicNoRet <opName, 1> {
let has_saddr = 1;
}
@@ -614,7 +617,7 @@ def flat_truncstorei16 : flat_st <truncstorei16>;
// Patterns for global loads with no offset.
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
- (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
+ (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
(inst $vaddr, $offset, 0, $slc)
>;
@@ -623,8 +626,18 @@ class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt>
(inst $vaddr, $offset, 0, $slc)
>;
+class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
+ (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))),
+ (inst $vaddr, $offset, 0, $slc)
+>;
+
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
- (node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)),
+ (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
+ (inst $vaddr, $data, $offset, 0, $slc)
+>;
+
+class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
+ (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
(inst $vaddr, $data, $offset, 0, $slc)
>;
@@ -635,12 +648,25 @@ class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt
(inst $vaddr, $data, $offset, 0, $slc)
>;
+class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
+ // atomic store follows atomic binop convention so the address comes
+ // first.
+ (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
+ (inst $vaddr, $data, $offset, 0, $slc)
+>;
+
class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : Pat <
(vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
(inst $vaddr, $data, $offset, $slc)
>;
+class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
+ ValueType data_vt = vt> : Pat <
+ (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
+ (inst $vaddr, $data, $offset, $slc)
+>;
+
let Predicates = [isCIVI] in {
def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
@@ -701,6 +727,65 @@ let Predicates = [isVI] in {
}
+let Predicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
+
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, az_extloadi8_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, az_extloadi8_global, i16>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, az_extloadi16_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
+
+
+def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, global_load, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, global_load, v2i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, global_load, v4i32>;
+
+def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, global_atomic_load, i32>;
+def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, global_atomic_load, i64>;
+
+def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
+def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, global_store, i16>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, global_store, i32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, global_store, v2i32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, global_store, v4i32>;
+
+def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, global_store_atomic, i32>;
+def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, global_store_atomic, i64>;
+
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_add_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_and_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_or_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
+
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
+
+} // End Predicates = [HasFlatGlobalInsts]
+
+
//===----------------------------------------------------------------------===//
// Target
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud