diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 8 |
5 files changed, 32 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 56922b05050..14ae62968c6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -165,6 +165,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::LOAD, MVT::v16f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); + setOperationAction(ISD::LOAD, MVT::v32f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32); + setOperationAction(ISD::LOAD, MVT::i64, Promote); AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); @@ -256,6 +259,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v16f32, Promote); AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); + setOperationAction(ISD::STORE, MVT::v32f32, Promote); + AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32); + setOperationAction(ISD::STORE, MVT::i64, Promote); AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); @@ -355,7 +361,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3eb1b1c9106..b90a0d28e9e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -153,6 +153,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, if (Subtarget->hasMAIInsts()) { addRegisterClass(MVT::v32i32, &AMDGPU::AReg_1024RegClass); + addRegisterClass(MVT::v32f32, &AMDGPU::AReg_1024RegClass); } computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -263,8 +264,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // We only support LOAD/STORE and vector manipulation ops for vectors // with > 4 elements. - for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, - MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v32i32 }) { + for (MVT VT : { MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, + MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, + MVT::v32i32, MVT::v32f32 }) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch (Op) { case ISD::LOAD: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 98928f00a45..c382c816e0b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2178,14 +2178,13 @@ def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; -// TODO: define v32f32 -def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32i32, f32, f32, v32i32]>; +def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>; def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; -def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32i32, v4f16, v4f16, v32i32]>; +def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>; def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; -def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32i32, v2i16, v2i16, v32i32]>; +def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>; def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index fd4b6f5e3e3..70f20bb6937 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -942,6 +942,14 @@ foreach Index = 0-31 in { def Insert_Element_v32i32_#Index : Insert_Element < i32, v32i32, Index, !cast<SubRegIndex>(sub#Index) >; + + def Extract_Element_v32f32_#Index : Extract_Element < + f32, v32f32, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Insert_Element_v32f32_#Index : Insert_Element < + f32, v32f32, Index, !cast<SubRegIndex>(sub#Index) + >; } // FIXME: Why do only some of these type combinations for SReg and @@ -1034,6 +1042,10 @@ def : BitConvert <v8f32, v8i32, VReg_256>; def : BitConvert <v16i32, v16f32, VReg_512>; def : BitConvert <v16f32, v16i32, VReg_512>; +// 1024-bit bitcast +def : BitConvert <v32i32, v32f32, VReg_1024>; +def : BitConvert <v32f32, v32i32, VReg_1024>; + /********** =================== **********/ /********** Src & Dst modifiers **********/ /********** =================== **********/ diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 14d41d84cba..4767f3c30ed 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -757,11 +757,11 @@ def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 3 let isAllocatable = 0; } -def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add SGPR_1024Regs)> { +def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add SGPR_1024Regs)> { let AllocationPriority = 19; } -def SReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32, +def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add SGPR_1024)> { let CopyCost = 16; let AllocationPriority = 19; @@ -812,7 +812,7 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> { let AllocationPriority = 7; } -def VReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add VGPR_1024)> { +def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add VGPR_1024)> { let Size = 1024; let CopyCost = 32; let AllocationPriority = 8; @@ -840,7 +840,7 @@ def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add AGPR_512)> { } // TODO: add v32f32 value type -def AReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add AGPR_1024)> { +def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add AGPR_1024)> { let Size = 1024; let CopyCost = 65; let AllocationPriority = 8; |

