diff options
| author | Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> | 2017-04-12 14:29:45 +0000 |
|---|---|---|
| committer | Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> | 2017-04-12 14:29:45 +0000 |
| commit | 7184c44d66fdbe5876cf5c70f73cd1a76ef9628a (patch) | |
| tree | 23c2324df528ec3b3830354e36373d8daf5f0100 /llvm | |
| parent | f518c756417f70866af52869a44fd3814c083d93 (diff) | |
| download | bcm5719-llvm-7184c44d66fdbe5876cf5c70f73cd1a76ef9628a.tar.gz bcm5719-llvm-7184c44d66fdbe5876cf5c70f73cd1a76ef9628a.zip | |
[AMDGPU][MC] Corrected ds_wrxchg2* to support two offsets
Fixed bug 28227: https://bugs.llvm.org//show_bug.cgi?id=28227
Reviewers: vpykhtin
Differential Revision: https://reviews.llvm.org/D31808
llvm-svn: 300066
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 28 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/ds.s | 16 |
2 files changed, 37 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 36437f61642..65dcd27ae7a 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -143,6 +143,20 @@ class DS_1A2D_RET<string opName, let hasPostISelHook = 1; } +class DS_1A2D_Off8_RET<string opName, + RegisterClass rc = VGPR_32, + RegisterClass src = rc> +: DS_Pseudo<opName, + (outs rc:$vdst), + (ins VGPR_32:$addr, src:$data0, src:$data1, offset0:$offset0, offset1:$offset1, gds:$gds), + "$vdst, $addr, $data0, $data1$offset0$offset1$gds"> { + + let has_offset = 0; + let AsmMatchConverter = "cvtDSOffset01"; + + let hasPostISelHook = 1; +} + class DS_1A_RET<string opName, RegisterClass rc = VGPR_32> : DS_Pseudo<opName, (outs rc:$vdst), @@ -353,9 +367,9 @@ def DS_MAX_RTN_F32 : DS_1A1D_RET <"ds_max_rtn_f32">, def DS_WRXCHG_RTN_B32 : DS_1A1D_RET<"ds_wrxchg_rtn_b32">, AtomicNoRet<"", 1>; -def DS_WRXCHG2_RTN_B32 : DS_1A2D_RET<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>, +def DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>, AtomicNoRet<"", 1>; -def DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>, +def DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>, AtomicNoRet<"", 1>; def DS_ADD_RTN_U64 : DS_1A1D_RET<"ds_add_rtn_u64", VReg_64>, @@ -394,11 +408,11 @@ def DS_MAX_RTN_F64 : DS_1A1D_RET<"ds_max_rtn_f64", VReg_64>, AtomicNoRet<"ds_max_f64", 1>; def DS_WRXCHG_RTN_B64 : DS_1A1D_RET<"ds_wrxchg_rtn_b64", VReg_64>, - AtomicNoRet<"ds_wrxchg_b64", 1>; -def DS_WRXCHG2_RTN_B64 : DS_1A2D_RET<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>, - AtomicNoRet<"ds_wrxchg2_b64", 1>; -def DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>, - AtomicNoRet<"ds_wrxchg2st64_b64", 1>; + AtomicNoRet<"", 1>; +def DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>, + AtomicNoRet<"", 1>; +def DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>, + AtomicNoRet<"", 1>; def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init">; def DS_GWS_SEMA_V : DS_GWS_0D<"ds_gws_sema_v">; diff --git a/llvm/test/MC/AMDGPU/ds.s b/llvm/test/MC/AMDGPU/ds.s index f234325c1d4..bfa4a2f7311 100644 --- a/llvm/test/MC/AMDGPU/ds.s +++ b/llvm/test/MC/AMDGPU/ds.s @@ -239,10 +239,18 @@ ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6 // SICI: ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xb8,0xd8,0x02,0x04,0x06,0x08] // VI: ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0x5c,0xd8,0x02,0x04,0x06,0x08] +ds_wrxchg2_rtn_b32 v[0:1], v0, v0, v0 offset0:127 offset1:255 +// SICI: ds_wrxchg2_rtn_b32 v[0:1], v0, v0, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xb8,0xd8,0x00,0x00,0x00,0x00] +// VI: ds_wrxchg2_rtn_b32 v[0:1], v0, v0, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x5c,0xd8,0x00,0x00,0x00,0x00] + ds_wrxchg2st64_rtn_b32 v[8:9] v2, v4, v6 // SICI: ds_wrxchg2st64_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xbc,0xd8,0x02,0x04,0x06,0x08] // VI: ds_wrxchg2st64_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0x5e,0xd8,0x02,0x04,0x06,0x08] +ds_wrxchg2st64_rtn_b32 v[0:1], v0, v255, v0 offset0:127 offset1:255 +// SICI: ds_wrxchg2st64_rtn_b32 v[0:1], v0, v255, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xbc,0xd8,0x00,0xff,0x00,0x00] +// VI: ds_wrxchg2st64_rtn_b32 v[0:1], v0, v255, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x5e,0xd8,0x00,0xff,0x00,0x00] + ds_cmpst_rtn_b32 v8, v2, v4, v6 // SICI: ds_cmpst_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc0,0xd8,0x02,0x04,0x06,0x08] // VI: ds_cmpst_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0x60,0xd8,0x02,0x04,0x06,0x08] @@ -444,10 +452,18 @@ ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7] // SICI: ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xb8,0xd9,0x02,0x04,0x06,0x08] // VI: ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xdc,0xd8,0x02,0x04,0x06,0x08] +ds_wrxchg2_rtn_b64 v[0:3], v0, v[1:2], v[0:1] offset0:127 offset1:255 +// SICI: ds_wrxchg2_rtn_b64 v[0:3], v0, v[1:2], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xb8,0xd9,0x00,0x01,0x00,0x00] +// VI: ds_wrxchg2_rtn_b64 v[0:3], v0, v[1:2], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xdc,0xd8,0x00,0x01,0x00,0x00] + ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7] // SICI: ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xbc,0xd9,0x02,0x04,0x06,0x08] // VI: ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xde,0xd8,0x02,0x04,0x06,0x08] +ds_wrxchg2st64_rtn_b64 v[0:3], v255, v[0:1], v[0:1] offset0:127 offset1:255 +// SICI: ds_wrxchg2st64_rtn_b64 v[0:3], v255, v[0:1], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xbc,0xd9,0xff,0x00,0x00,0x00] +// VI: ds_wrxchg2st64_rtn_b64 v[0:3], v255, v[0:1], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xde,0xd8,0xff,0x00,0x00,0x00] + ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7] // SICI: ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xc0,0xd9,0x02,0x04,0x06,0x08] // VI: ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xe0,0xd8,0x02,0x04,0x06,0x08] |

