summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorDmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>2017-04-12 14:29:45 +0000
committerDmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>2017-04-12 14:29:45 +0000
commit7184c44d66fdbe5876cf5c70f73cd1a76ef9628a (patch)
tree23c2324df528ec3b3830354e36373d8daf5f0100 /llvm
parentf518c756417f70866af52869a44fd3814c083d93 (diff)
downloadbcm5719-llvm-7184c44d66fdbe5876cf5c70f73cd1a76ef9628a.tar.gz
bcm5719-llvm-7184c44d66fdbe5876cf5c70f73cd1a76ef9628a.zip
[AMDGPU][MC] Corrected ds_wrxchg2* to support two offsets
Fixed bug 28227: https://bugs.llvm.org//show_bug.cgi?id=28227 Reviewers: vpykhtin Differential Revision: https://reviews.llvm.org/D31808 llvm-svn: 300066
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td28
-rw-r--r--llvm/test/MC/AMDGPU/ds.s16
2 files changed, 37 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 36437f61642..65dcd27ae7a 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -143,6 +143,20 @@ class DS_1A2D_RET<string opName,
let hasPostISelHook = 1;
}
+class DS_1A2D_Off8_RET<string opName,
+ RegisterClass rc = VGPR_32,
+ RegisterClass src = rc>
+: DS_Pseudo<opName,
+ (outs rc:$vdst),
+ (ins VGPR_32:$addr, src:$data0, src:$data1, offset0:$offset0, offset1:$offset1, gds:$gds),
+ "$vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
+
+ let has_offset = 0;
+ let AsmMatchConverter = "cvtDSOffset01";
+
+ let hasPostISelHook = 1;
+}
+
class DS_1A_RET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs rc:$vdst),
@@ -353,9 +367,9 @@ def DS_MAX_RTN_F32 : DS_1A1D_RET <"ds_max_rtn_f32">,
def DS_WRXCHG_RTN_B32 : DS_1A1D_RET<"ds_wrxchg_rtn_b32">,
AtomicNoRet<"", 1>;
-def DS_WRXCHG2_RTN_B32 : DS_1A2D_RET<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>,
+def DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>,
AtomicNoRet<"", 1>;
-def DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>,
+def DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>,
AtomicNoRet<"", 1>;
def DS_ADD_RTN_U64 : DS_1A1D_RET<"ds_add_rtn_u64", VReg_64>,
@@ -394,11 +408,11 @@ def DS_MAX_RTN_F64 : DS_1A1D_RET<"ds_max_rtn_f64", VReg_64>,
AtomicNoRet<"ds_max_f64", 1>;
def DS_WRXCHG_RTN_B64 : DS_1A1D_RET<"ds_wrxchg_rtn_b64", VReg_64>,
- AtomicNoRet<"ds_wrxchg_b64", 1>;
-def DS_WRXCHG2_RTN_B64 : DS_1A2D_RET<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>,
- AtomicNoRet<"ds_wrxchg2_b64", 1>;
-def DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>,
- AtomicNoRet<"ds_wrxchg2st64_b64", 1>;
+ AtomicNoRet<"", 1>;
+def DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>,
+ AtomicNoRet<"", 1>;
+def DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>,
+ AtomicNoRet<"", 1>;
def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init">;
def DS_GWS_SEMA_V : DS_GWS_0D<"ds_gws_sema_v">;
diff --git a/llvm/test/MC/AMDGPU/ds.s b/llvm/test/MC/AMDGPU/ds.s
index f234325c1d4..bfa4a2f7311 100644
--- a/llvm/test/MC/AMDGPU/ds.s
+++ b/llvm/test/MC/AMDGPU/ds.s
@@ -239,10 +239,18 @@ ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6
// SICI: ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xb8,0xd8,0x02,0x04,0x06,0x08]
// VI: ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0x5c,0xd8,0x02,0x04,0x06,0x08]
+ds_wrxchg2_rtn_b32 v[0:1], v0, v0, v0 offset0:127 offset1:255
+// SICI: ds_wrxchg2_rtn_b32 v[0:1], v0, v0, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xb8,0xd8,0x00,0x00,0x00,0x00]
+// VI: ds_wrxchg2_rtn_b32 v[0:1], v0, v0, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x5c,0xd8,0x00,0x00,0x00,0x00]
+
ds_wrxchg2st64_rtn_b32 v[8:9] v2, v4, v6
// SICI: ds_wrxchg2st64_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xbc,0xd8,0x02,0x04,0x06,0x08]
// VI: ds_wrxchg2st64_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0x5e,0xd8,0x02,0x04,0x06,0x08]
+ds_wrxchg2st64_rtn_b32 v[0:1], v0, v255, v0 offset0:127 offset1:255
+// SICI: ds_wrxchg2st64_rtn_b32 v[0:1], v0, v255, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xbc,0xd8,0x00,0xff,0x00,0x00]
+// VI: ds_wrxchg2st64_rtn_b32 v[0:1], v0, v255, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x5e,0xd8,0x00,0xff,0x00,0x00]
+
ds_cmpst_rtn_b32 v8, v2, v4, v6
// SICI: ds_cmpst_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc0,0xd8,0x02,0x04,0x06,0x08]
// VI: ds_cmpst_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0x60,0xd8,0x02,0x04,0x06,0x08]
@@ -444,10 +452,18 @@ ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
// SICI: ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xb8,0xd9,0x02,0x04,0x06,0x08]
// VI: ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xdc,0xd8,0x02,0x04,0x06,0x08]
+ds_wrxchg2_rtn_b64 v[0:3], v0, v[1:2], v[0:1] offset0:127 offset1:255
+// SICI: ds_wrxchg2_rtn_b64 v[0:3], v0, v[1:2], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xb8,0xd9,0x00,0x01,0x00,0x00]
+// VI: ds_wrxchg2_rtn_b64 v[0:3], v0, v[1:2], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xdc,0xd8,0x00,0x01,0x00,0x00]
+
ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
// SICI: ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xbc,0xd9,0x02,0x04,0x06,0x08]
// VI: ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xde,0xd8,0x02,0x04,0x06,0x08]
+ds_wrxchg2st64_rtn_b64 v[0:3], v255, v[0:1], v[0:1] offset0:127 offset1:255
+// SICI: ds_wrxchg2st64_rtn_b64 v[0:3], v255, v[0:1], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xbc,0xd9,0xff,0x00,0x00,0x00]
+// VI: ds_wrxchg2st64_rtn_b64 v[0:3], v255, v[0:1], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xde,0xd8,0xff,0x00,0x00,0x00]
+
ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7]
// SICI: ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xc0,0xd9,0x02,0x04,0x06,0x08]
// VI: ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xe0,0xd8,0x02,0x04,0x06,0x08]
OpenPOWER on IntegriCloud