AMDGPU/GlobalISel: Fix G_EXTRACT_VECTOR_ELT mapping for s-v case

If an SGPR vector is indexed with a VGPR, the actual indexing will be done on the SGPR and produce an SGPR. A copy needs to be inserted inside the waterwall loop to the VGPR result.
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2020-01-02 19:01:55 -0500
committer: Matt Arsenault <arsenm2@gmail.com> 2020-01-09 19:46:54 -0500
commit: 5cabb8357aeb3bbecaef4825c3a594f86ef94c8d (patch)
tree: 70b1f00a4dc1f8300dfc2333cd0e6d13e6928e49 /llvm/test/CodeGen/AMDGPU
parent: 375371cc8bff7ba02d0a2203f80de5e640fcadf1 (diff)
download: bcm5719-llvm-5cabb8357aeb3bbecaef4825c3a594f86ef94c8d.tar.gz
bcm5719-llvm-5cabb8357aeb3bbecaef4825c3a594f86ef94c8d.zip
1 files changed, 16 insertions, 10 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
index 2733146187f..0efdf5dfc2f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
@@ -55,7 +55,8 @@ body: |
     ; WAVE64: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
     ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
     ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
-    ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+    ; WAVE64: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+    ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
     ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
     ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
     ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -63,7 +64,7 @@ body: |
     ; WAVE64: successors: %bb.3(0x80000000)
     ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
     ; WAVE64: .3:
-    ; WAVE64: $vgpr0 = COPY [[EVEC]](s32)
+    ; WAVE64: $vgpr0 = COPY [[V_MOV_B32_e32_]](s32)
     ; WAVE32-LABEL: name: extract_vector_elt_v16s32_sv
     ; WAVE32: successors: %bb.1(0x80000000)
     ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
@@ -78,7 +79,8 @@ body: |
     ; WAVE32: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
     ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
     ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
-    ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+    ; WAVE32: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+    ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
     ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
     ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
     ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -86,7 +88,7 @@ body: |
     ; WAVE32: successors: %bb.3(0x80000000)
     ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
     ; WAVE32: .3:
-    ; WAVE32: $vgpr0 = COPY [[EVEC]](s32)
+    ; WAVE32: $vgpr0 = COPY [[V_MOV_B32_e32_]](s32)
     %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
     %1:_(s32) = COPY $vgpr0
     %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -280,8 +282,10 @@ body: |
     ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
     ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
     ; WAVE64: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
-    ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
-    ; WAVE64: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+    ; WAVE64: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
+    ; WAVE64: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+    ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
+    ; WAVE64: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC1]](s32), implicit $exec
     ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
     ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
     ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -289,7 +293,7 @@ body: |
     ; WAVE64: successors: %bb.3(0x80000000)
     ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
     ; WAVE64: .3:
-    ; WAVE64: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
+    ; WAVE64: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[V_MOV_B32_e32_]](s32), [[V_MOV_B32_e32_1]](s32)
     ; WAVE64: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ; WAVE32-LABEL: name: extract_vector_elt_v8s64_sv
     ; WAVE32: successors: %bb.1(0x80000000)
@@ -315,8 +319,10 @@ body: |
     ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
     ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
     ; WAVE32: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
-    ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
-    ; WAVE32: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+    ; WAVE32: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
+    ; WAVE32: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+    ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
+    ; WAVE32: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC1]](s32), implicit $exec
     ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
     ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
     ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -324,7 +330,7 @@ body: |
     ; WAVE32: successors: %bb.3(0x80000000)
     ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
     ; WAVE32: .3:
-    ; WAVE32: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
+    ; WAVE32: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[V_MOV_B32_e32_]](s32), [[V_MOV_B32_e32_1]](s32)
     ; WAVE32: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
     %1:_(s32) = COPY $vgpr0
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2020-01-02 19:01:55 -0500
committer	Matt Arsenault <arsenm2@gmail.com>	2020-01-09 19:46:54 -0500
commit	5cabb8357aeb3bbecaef4825c3a594f86ef94c8d (patch)
tree	70b1f00a4dc1f8300dfc2333cd0e6d13e6928e49 /llvm/test/CodeGen/AMDGPU
parent	375371cc8bff7ba02d0a2203f80de5e640fcadf1 (diff)
download	bcm5719-llvm-5cabb8357aeb3bbecaef4825c3a594f86ef94c8d.tar.gz bcm5719-llvm-5cabb8357aeb3bbecaef4825c3a594f86ef94c8d.zip