diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/FLATInstructions.td | 4 |
6 files changed, 27 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 73490f6c421..6a028a90196 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -322,6 +322,13 @@ def FeatureDLInsts : SubtargetFeature<"dl-insts", "Has deep learning instructions" >; +def FeatureD16PreservesUnusedBits : SubtargetFeature< + "d16-preserves-unused-bits", + "D16PreservesUnusedBits", + "true", + "D16 memory instructions preserve unused bits rather than zeroing them out" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -608,20 +615,23 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0, def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0, [FeatureGFX9, FeatureMadMixInsts, - FeatureLDSBankCount32 + FeatureLDSBankCount32, + FeatureD16PreservesUnusedBits ]>; def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2, [FeatureGFX9, FeatureMadMixInsts, FeatureLDSBankCount32, - FeatureXNACK + FeatureXNACK, + FeatureD16PreservesUnusedBits ]>; def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4, [FeatureGFX9, FeatureLDSBankCount32, - FeatureFmaMixInsts]>; + FeatureFmaMixInsts, + FeatureD16PreservesUnusedBits]>; def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6, [FeatureGFX9, @@ -769,6 +779,8 @@ def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">, def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">, AssemblerPredicate<"!FeatureUnpackedD16VMem">; +def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">, + AssemblerPredicate<"FeatureD16PreservesUnusedBits">; def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 40c583ba4f5..850715f4647 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -162,6 +162,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasSDWAOutModsVOPC(false), HasDPP(false), HasDLInsts(false), + D16PreservesUnusedBits(false), FlatAddressSpace(false), FlatInstOffsets(false), FlatGlobalInsts(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index cce8800159c..0b070e7b4a0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -166,6 +166,7 @@ protected: bool HasSDWAOutModsVOPC; bool HasDPP; bool HasDLInsts; + bool D16PreservesUnusedBits; bool FlatAddressSpace; bool FlatInstOffsets; bool FlatGlobalInsts; @@ -546,6 +547,10 @@ public: return HasDLInsts; } + bool d16PreservesUnusedBits() const { + return D16PreservesUnusedBits; + } + /// Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index dc528fbb9f3..999d5534050 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1374,7 +1374,7 @@ defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>; defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>; -let OtherPredicates = [HasD16LoadStore] in { +let OtherPredicates = [D16PreservesUnusedBits] in { defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, i16, load_private>; defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, i16, az_extloadi8_private>; defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, i16, sextloadi8_private>; @@ -1489,7 +1489,7 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OF defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>; -let OtherPredicates = [HasD16LoadStore] in { +let OtherPredicates = [D16PreservesUnusedBits] in { // Hiding the extract high pattern in the PatFrag seems to not // automatically increase the complexity. let AddedComplexity = 1 in { diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index c0f6d270b88..28887ea4a49 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -655,7 +655,7 @@ defm : DSReadPat_mc <DS_READ_B128, v4i32, "load_align16_local">; } // End AddedComplexity = 100 -let OtherPredicates = [HasD16LoadStore] in { +let OtherPredicates = [D16PreservesUnusedBits] in { let AddedComplexity = 100 in { defm : DSReadPat_Hi16<DS_READ_U16_D16_HI, load_local>; defm : DSReadPat_Hi16<DS_READ_U8_D16_HI, az_extloadi8_local>; @@ -689,7 +689,7 @@ defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">; defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">; defm : DSWritePat_mc <DS_WRITE_B32, i32, "store_local">; -let OtherPredicates = [HasD16LoadStore] in { +let OtherPredicates = [D16PreservesUnusedBits] in { def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_local_hi16>; def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_local_hi16>; } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 69386912808..e6d35445a90 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -780,7 +780,7 @@ def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>; def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; -let OtherPredicates = [HasD16LoadStore] in { +let OtherPredicates = [D16PreservesUnusedBits] in { def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; @@ -824,7 +824,7 @@ def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32>; def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32>; def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32>; -let OtherPredicates = [HasD16LoadStore] in { +let OtherPredicates = [D16PreservesUnusedBits] in { def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; |

