summaryrefslogtreecommitdiffstats
path: root/llvm/test/MC/Disassembler/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/MC/Disassembler/AMDGPU')
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx10-sgpr-max.txt6
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/wave32.txt164
2 files changed, 170 insertions, 0 deletions
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10-sgpr-max.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10-sgpr-max.txt
index 4e8efd1120a..70a92e91000 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10-sgpr-max.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10-sgpr-max.txt
@@ -8,3 +8,9 @@
# GFX10: s_mov_b32 s105, s104 ; encoding: [0x68,0x03,0xe9,0xbe]
0x68,0x03,0xe9,0xbe
+
+# GFX10: v_cmp_eq_f32_e64 s105, v0, s105
+0x69,0x00,0x02,0xd4,0x00,0xd3,0x00,0x00
+
+# GFX10: v_cmp_eq_f32_sdwa s105, v0, s105 src0_sel:DWORD src1_sel:DWORD
+0xf9,0xd2,0x04,0x7c,0x00,0xe9,0x06,0x86
diff --git a/llvm/test/MC/Disassembler/AMDGPU/wave32.txt b/llvm/test/MC/Disassembler/AMDGPU/wave32.txt
new file mode 100644
index 00000000000..643cd7828da
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/wave32.txt
@@ -0,0 +1,164 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1032 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64,-wavefrontsize32 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1064 %s
+
+# GFX1032: v_cmp_lt_f32_e32 vcc_lo, s2, v4
+# GFX1064: v_cmp_lt_f32_e32 vcc, s2, v4
+0x02,0x08,0x02,0x7c
+
+# GFX1032: v_cmp_ge_i32_e64 s2, s0, v2
+# GFX1064: v_cmp_ge_i32_e64 s[2:3], s0, v2
+0x02,0x00,0x86,0xd4,0x00,0x04,0x02,0x00
+
+# GFX1032: v_cmp_ge_i32_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:DWORD
+# GFX1064: v_cmp_ge_i32_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:DWORD
+0xf9,0x04,0x0c,0x7d,0x00,0x00,0x05,0x06
+
+# GFX1032: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
+# GFX1064: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
+0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06
+
+# GFX1032: v_cmp_class_f32_e32 vcc_lo, s0, v0
+# GFX1064: v_cmp_class_f32_e32 vcc, s0, v0
+0x00,0x00,0x10,0x7d
+
+# GFX1032: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
+# GFX1064: v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD
+0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06
+
+# GFX1032: v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD
+# GFX1064: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
+0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06
+
+# GFX1032: v_cndmask_b32_e32 v5, 0, v2, vcc_lo
+# GFX1064: v_cndmask_b32_e32 v5, 0, v2, vcc ;
+0x80,0x04,0x0a,0x02
+
+# GFX1032: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
+# GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ;
+0x02,0x07,0x02,0x02
+
+# GFX1032: v_add_co_u32_e64 v2, vcc_lo, s0, v2
+# GFX1064: v_add_co_u32_e64 v2, vcc, s0, v2
+0x02,0x6a,0x0f,0xd7,0x00,0x04,0x02,0x00
+
+# GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
+# GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ;
+0x03,0x09,0x06,0x50
+
+# GFX1032: v_sub_co_u32_e64 v2, vcc_lo, s0, v2
+# GFX1064: v_sub_co_u32_e64 v2, vcc, s0, v2
+0x02,0x6a,0x10,0xd7,0x00,0x04,0x02,0x00
+
+# GFX1032: v_subrev_co_u32_e64 v2, vcc_lo, s0, v2
+# GFX1064: v_subrev_co_u32_e64 v2, vcc, s0, v2
+0x02,0x6a,0x19,0xd7,0x00,0x04,0x02,0x00
+
+# GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
+# GFX1064: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ;
+0x03,0x09,0x06,0x52
+
+# GFX1032: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+# GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ;
+0x80,0x02,0x02,0x54
+
+# GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+# GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06
+
+# GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+# GFX1064: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06
+
+# GFX1032: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+# GFX1064: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06
+
+# GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+# GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e
+
+# GFX1032: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+# GFX1064: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0x00
+
+# FIXME: Results in invalid v_subrev_u16_dpp which apparently has the same encoding but does not exist in GFX10
+
+# gfx1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+# gfx1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+# 0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00
+
+# FIXME: Results in v_mul_lo_u16_dpp
+
+# gfx1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+# gfx1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+# 0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00
+
+# FIXME: gives v_lshlrev_b16_dpp
+
+# gfx1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+# gfx1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+# 0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00
+
+# GFX1032: v_add_co_u32_e64 v0, s0, v0, v2
+# GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2
+0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00
+
+# GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
+# GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
+0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00
+
+# GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2
+# GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2
+0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00
+
+# GFX1032: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
+# GFX1064: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
+0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00
+
+# GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2
+# GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2
+0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00
+
+# GFX1032: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
+# GFX1064: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
+0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00
+
+# GFX1032: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2
+# GFX1064: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3]
+0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00
+
+# GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo
+# GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ;
+0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01
+
+# GFX1032: v_div_scale_f32 v2, s2, v0, v0, v2
+# GFX1064: v_div_scale_f32 v2, s[2:3], v0, v0, v2
+0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04
+
+# GFX1032: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3]
+# GFX1064: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3]
+0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04
+
+# GFX1032: v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3]
+# GFX1064: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3]
+0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04
+
+# GFX1032: v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3]
+# GFX1064: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3]
+0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04
+
+# GFX1032: v_cmpx_neq_f32_e32 v0, v1
+# GFX1064: v_cmpx_neq_f32_e32 v0, v1
+0x00,0x03,0x3a,0x7c
+
+# GFX1032: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
+# GFX1064: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
+0xf9,0x02,0x3a,0x7c,0x00,0x00,0x05,0x06
+
+# GFX1032: v_cmpx_class_f32_e64 v0, 1
+# GFX1064: v_cmpx_class_f32_e64 v0, 1
+0x00,0x00,0x98,0xd4,0x00,0x03,0x01,0x00
+
+# GFX1032: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
+# GFX1064: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
+0xf9,0x02,0x31,0x7d,0x00,0x00,0x05,0x86
OpenPOWER on IntegriCloud