summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir')
-rw-r--r--llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir56
1 files changed, 56 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
new file mode 100644
index 00000000000..99a000cbd39
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
@@ -0,0 +1,56 @@
+# RUN: llc -march=amdgcn -mcpu=fiji -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
+
+# SDWA-LABEL: {{^}}add_f16_u32_preserve
+
+# SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
+# SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
+
+# SDWA: v_mul_f32_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_3
+# SDWA: v_add_f16_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:WORD_1
+
+# SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[RES]]
+
+---
+name: add_f16_u32_preserve
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: sreg_64 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: vgpr_32 }
+ - { id: 6, class: vgpr_32 }
+ - { id: 7, class: vgpr_32 }
+ - { id: 8, class: vgpr_32 }
+ - { id: 9, class: vgpr_32 }
+ - { id: 10, class: vgpr_32 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+body: |
+ bb.0:
+ liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31
+
+ %2 = COPY %sgpr30_sgpr31
+ %1 = COPY %vgpr2_vgpr3
+ %0 = COPY %vgpr0_vgpr1
+ %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
+ %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
+
+ %5 = V_AND_B32_e32 65535, %3, implicit %exec
+ %6 = V_LSHRREV_B32_e64 16, %4, implicit %exec
+ %7 = V_BFE_U32 %3, 8, 8, implicit %exec
+ %8 = V_LSHRREV_B32_e32 24, %4, implicit %exec
+
+ %9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit %exec
+ %10 = V_LSHLREV_B16_e64 8, %9, implicit %exec
+ %11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit %exec
+ %12 = V_LSHLREV_B32_e64 16, %11, implicit %exec
+
+ %13 = V_OR_B32_e64 %10, %12, implicit %exec
+
+ FLAT_STORE_DWORD %0, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
+ %sgpr30_sgpr31 = COPY %2
+ S_SETPC_B64_return %sgpr30_sgpr31
OpenPOWER on IntegriCloud