summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td18
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h5
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td4
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td4
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td4
6 files changed, 27 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 73490f6c421..6a028a90196 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -322,6 +322,13 @@ def FeatureDLInsts : SubtargetFeature<"dl-insts",
"Has deep learning instructions"
>;
+def FeatureD16PreservesUnusedBits : SubtargetFeature<
+ "d16-preserves-unused-bits",
+ "D16PreservesUnusedBits",
+ "true",
+ "D16 memory instructions preserve unused bits rather than zeroing them out"
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -608,20 +615,23 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
[FeatureGFX9,
FeatureMadMixInsts,
- FeatureLDSBankCount32
+ FeatureLDSBankCount32,
+ FeatureD16PreservesUnusedBits
]>;
def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
- FeatureXNACK
+ FeatureXNACK,
+ FeatureD16PreservesUnusedBits
]>;
def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4,
[FeatureGFX9,
FeatureLDSBankCount32,
- FeatureFmaMixInsts]>;
+ FeatureFmaMixInsts,
+ FeatureD16PreservesUnusedBits]>;
def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
[FeatureGFX9,
@@ -769,6 +779,8 @@ def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
AssemblerPredicate<"!FeatureUnpackedD16VMem">;
+def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">,
+ AssemblerPredicate<"FeatureD16PreservesUnusedBits">;
def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 40c583ba4f5..850715f4647 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -162,6 +162,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasSDWAOutModsVOPC(false),
HasDPP(false),
HasDLInsts(false),
+ D16PreservesUnusedBits(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
FlatGlobalInsts(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index cce8800159c..0b070e7b4a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -166,6 +166,7 @@ protected:
bool HasSDWAOutModsVOPC;
bool HasDPP;
bool HasDLInsts;
+ bool D16PreservesUnusedBits;
bool FlatAddressSpace;
bool FlatInstOffsets;
bool FlatGlobalInsts;
@@ -546,6 +547,10 @@ public:
return HasDLInsts;
}
+ bool d16PreservesUnusedBits() const {
+ return D16PreservesUnusedBits;
+ }
+
/// Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index dc528fbb9f3..999d5534050 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1374,7 +1374,7 @@ defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;
-let OtherPredicates = [HasD16LoadStore] in {
+let OtherPredicates = [D16PreservesUnusedBits] in {
defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, i16, load_private>;
defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, i16, az_extloadi8_private>;
defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, i16, sextloadi8_private>;
@@ -1489,7 +1489,7 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OF
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>;
-let OtherPredicates = [HasD16LoadStore] in {
+let OtherPredicates = [D16PreservesUnusedBits] in {
// Hiding the extract high pattern in the PatFrag seems to not
// automatically increase the complexity.
let AddedComplexity = 1 in {
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index c0f6d270b88..28887ea4a49 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -655,7 +655,7 @@ defm : DSReadPat_mc <DS_READ_B128, v4i32, "load_align16_local">;
} // End AddedComplexity = 100
-let OtherPredicates = [HasD16LoadStore] in {
+let OtherPredicates = [D16PreservesUnusedBits] in {
let AddedComplexity = 100 in {
defm : DSReadPat_Hi16<DS_READ_U16_D16_HI, load_local>;
defm : DSReadPat_Hi16<DS_READ_U8_D16_HI, az_extloadi8_local>;
@@ -689,7 +689,7 @@ defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
defm : DSWritePat_mc <DS_WRITE_B32, i32, "store_local">;
-let OtherPredicates = [HasD16LoadStore] in {
+let OtherPredicates = [D16PreservesUnusedBits] in {
def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_local_hi16>;
def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_local_hi16>;
}
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 69386912808..e6d35445a90 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -780,7 +780,7 @@ def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
-let OtherPredicates = [HasD16LoadStore] in {
+let OtherPredicates = [D16PreservesUnusedBits] in {
def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
@@ -824,7 +824,7 @@ def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32>;
-let OtherPredicates = [HasD16LoadStore] in {
+let OtherPredicates = [D16PreservesUnusedBits] in {
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
OpenPOWER on IntegriCloud