summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2016-11-15 21:25:56 +0000
committerTom Stellard <thomas.stellard@amd.com>2016-11-15 21:25:56 +0000
commitd23de360dbbae33a5d6ba1519fe3156fa6f4cd85 (patch)
tree6a8851805aa7f1495d533f10fa7a4a49e652741b
parentc3dcf99441322c37eb3a7a013b92554622beb7a6 (diff)
downloadbcm5719-llvm-d23de360dbbae33a5d6ba1519fe3156fa6f4cd85.tar.gz
bcm5719-llvm-d23de360dbbae33a5d6ba1519fe3156fa6f4cd85.zip
AMDGPU/SI: Fix pattern for i16 = sign_extend i1
Reviewers: arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, llvm-commits, tony-tye Differential Revision: https://reviews.llvm.org/D26670 llvm-svn: 287035
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td6
-rw-r--r--llvm/test/CodeGen/AMDGPU/sign_extend.ll33
2 files changed, 38 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 8e86aa0796e..b87f3be2139 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -433,9 +433,13 @@ defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e32>;
defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_B16_e32>;
def : ZExt_i16_i1_Pat<zext>;
-def : ZExt_i16_i1_Pat<sext>;
def : ZExt_i16_i1_Pat<anyext>;
+def : Pat <
+ (i16 (sext i1:$src)),
+ (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)
+>;
+
} // End Predicates = [isVI]
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/sign_extend.ll b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
index a7db28e5167..05938170eba 100644
--- a/llvm/test/CodeGen/AMDGPU/sign_extend.ll
+++ b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
@@ -72,6 +72,35 @@ define void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
ret void
}
+; This purpose of this test is to make sure the i16 = sign_extend i1 node
+; makes it all the way throught the legalizer/optimizer to make sure
+; we select this correctly. In the s_sext_i1_to_i16, the sign_extend node
+; is optimized to a select very early.
+; GCN-LABEL: {{^}}s_sext_i1_to_i16_with_and:
+; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1
+; GCN-NEXT: buffer_store_short [[RESULT]]
+define void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+ %cmp0 = icmp eq i32 %a, %b
+ %cmp1 = icmp eq i32 %c, %d
+ %cmp = and i1 %cmp0, %cmp1
+ %sext = sext i1 %cmp to i16
+ store i16 %sext, i16 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_sext_i1_to_i16_with_and:
+; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1
+; GCN-NEXT: buffer_store_short [[RESULT]]
+define void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
+ %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+ %cmp0 = icmp eq i32 %a, %tid
+ %cmp1 = icmp eq i32 %b, %c
+ %cmp = and i1 %cmp0, %cmp1
+ %sext = sext i1 %cmp to i16
+ store i16 %sext, i16 addrspace(1)* %out
+ ret void
+}
+
; GCN-LABEL: {{^}}s_sext_v4i8_to_v4i32:
; GCN: s_load_dword [[VAL:s[0-9]+]]
; GCN-DAG: s_bfe_i32 [[EXT2:s[0-9]+]], [[VAL]], 0x80010
@@ -191,3 +220,7 @@ define void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in
store volatile i32 %elt3, i32 addrspace(1)* %out
ret void
}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #1 = { nounwind readnone }
OpenPOWER on IntegriCloud