summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/GlobalISel
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-03 17:55:27 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-03 17:55:27 +0000
commit233ff982c787aeaebe2dc271695e663675eddf74 (patch)
tree076a197a7184c81dc54390c34580b568ebdec348 /llvm/test/CodeGen/AMDGPU/GlobalISel
parent56271fe180f48fb36cad0b8facbd832611793ed4 (diff)
downloadbcm5719-llvm-233ff982c787aeaebe2dc271695e663675eddf74.tar.gz
bcm5719-llvm-233ff982c787aeaebe2dc271695e663675eddf74.zip
AMDGPU/GlobalISel: Split 64-bit vector extracts during RegBankSelect
Register indexing 64-bit elements is possible on the SALU, but not the VALU. Handle splitting this into two 32-bit indexes. Extend waterfall loop handling to allow moving a range of instructions. llvm-svn: 373638
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/GlobalISel')
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir120
1 files changed, 114 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
index 5613b1b4c27..b390e33e4ae 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
@@ -117,6 +117,102 @@ body: |
...
---
+name: extract_vector_elt_v8s64_ss
+legalized: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
+ ; CHECK-LABEL: name: extract_vector_elt_v8s64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
+ ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[COPY1]](s32)
+ ; CHECK: $sgpr0_sgpr1 = COPY [[EVEC]](s64)
+ %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ %1:_(s32) = COPY $sgpr16
+ %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
+ $sgpr0_sgpr1 = COPY %2
+...
+
+---
+name: extract_vector_elt_v8s64_vs
+legalized: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
+
+ ; CHECK-LABEL: name: extract_vector_elt_v8s64_vs
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY1]], [[C]](s32)
+ ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+ ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
+ ; CHECK: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: extract_vector_elt_v8s64_sv
+legalized: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
+ ; CHECK-LABEL: name: extract_vector_elt_v8s64_sv
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF2:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF3:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF4:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; CHECK: .1:
+ ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF4]], %bb.0, %20, %bb.1
+ ; CHECK: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %7(s32), %bb.1
+ ; CHECK: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %8(s32), %bb.1
+ ; CHECK: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %3(s32), %bb.1
+ ; CHECK: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %4(s32), %bb.1
+ ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
+ ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+ ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
+ ; CHECK: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; CHECK: .2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; CHECK: .3:
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
name: extract_vector_elt_v8s64_vv
legalized: true
tracksRegLiveness: true
@@ -129,16 +225,27 @@ body: |
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s64) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF2:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF3:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF4:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s64) = G_PHI [[DEF]](s64), %bb.0, %2(s64), %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF4]], %bb.0, %20, %bb.1
+ ; CHECK: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %7(s32), %bb.1
+ ; CHECK: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %8(s32), %bb.1
+ ; CHECK: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %3(s32), %bb.1
+ ; CHECK: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %4(s32), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
- ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
+ ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+ ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
+ ; CHECK: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -146,7 +253,8 @@ body: |
; CHECK: successors: %bb.3(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: .3:
- ; CHECK: $vgpr0_vgpr1 = COPY [[EVEC]](s64)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s32) = COPY $vgpr16
%2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
OpenPOWER on IntegriCloud