summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/NVPTX
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2017-01-18 00:09:01 +0000
committerJustin Lebar <jlebar@google.com>2017-01-18 00:09:01 +0000
commitcc938fc197ce53694667e47d183f0aae2219c6c9 (patch)
treecca7da71d94dab6f7e4d75cb1312a7cc44447dc7 /llvm/test/CodeGen/NVPTX
parent7dc3d6c3415db5f30d4acafc725a50debdac3bf7 (diff)
downloadbcm5719-llvm-cc938fc197ce53694667e47d183f0aae2219c6c9.tar.gz
bcm5719-llvm-cc938fc197ce53694667e47d183f0aae2219c6c9.zip
[NVPTX] Implement min/max in tablegen, rather than with custom DAGComine logic.
Summary: This change also lets us use max.{s,u}16. There's a vague warning in a test about this maybe being less efficient, but I could not come up with a case where the resulting SASS (sm_35 or sm_60) was different with or without max.{s,u}16. It's true that nvcc seems to emit only max.{s,u}32, but even ptxas 7.0 seems to have no problem generating efficient SASS from max.{s,u}16 (the casts up to i32 and back down to i16 seem to be implicit and nops, happening via register aliasing). In the absence of evidence, better to have fewer special cases, emit more straightforward code, etc. In particular, if a new GPU has 16-bit min/max instructions, we want to be able to use them. Reviewers: tra Subscribers: jholewinski, llvm-commits Differential Revision: https://reviews.llvm.org/D28732 llvm-svn: 292304
Diffstat (limited to 'llvm/test/CodeGen/NVPTX')
-rw-r--r--llvm/test/CodeGen/NVPTX/combine-min-max.ll134
1 files changed, 127 insertions, 7 deletions
diff --git a/llvm/test/CodeGen/NVPTX/combine-min-max.ll b/llvm/test/CodeGen/NVPTX/combine-min-max.ll
index 64bb7a37ffd..3de86be10a5 100644
--- a/llvm/test/CodeGen/NVPTX/combine-min-max.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-min-max.ll
@@ -21,20 +21,140 @@ define i64 @ba_ne_i64(i64 %a, i64 %b) {
ret i64 %sel
}
-; PTX does have e.g. max.s16, but at least as of Kepler (sm_3x) that
-; gets compiled to SASS that converts the 16 bit parameters to 32 bit
-; before using a 32 bit instruction. That is probably not a win and
-; NVCC 7.5 does not emit 16 bit min/max either, presumably for that
-; reason.
+; *************************************
+; * All variations with i16
+
+; *** ab, unsigned, i16
define i16 @ab_ugt_i16(i16 %a, i16 %b) {
; LABEL: @ab_ugt_i16
-; CHECK-NOT: min
-; CHECK-NOT: max
+; CHECK: max.u16
%cmp = icmp ugt i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
ret i16 %sel
}
+define i16 @ab_uge_i16(i16 %a, i16 %b) {
+; LABEL: @ab_uge_i16
+; CHECK: max.u16
+ %cmp = icmp uge i16 %a, %b
+ %sel = select i1 %cmp, i16 %a, i16 %b
+ ret i16 %sel
+}
+
+define i16 @ab_ult_i16(i16 %a, i16 %b) {
+; LABEL: @ab_ult_i16
+; CHECK: min.u16
+ %cmp = icmp ult i16 %a, %b
+ %sel = select i1 %cmp, i16 %a, i16 %b
+ ret i16 %sel
+}
+
+define i16 @ab_ule_i16(i16 %a, i16 %b) {
+; LABEL: @ab_ule_i16
+; CHECK: min.u16
+ %cmp = icmp ule i16 %a, %b
+ %sel = select i1 %cmp, i16 %a, i16 %b
+ ret i16 %sel
+}
+
+; *** ab, signed, i16
+define i16 @ab_sgt_i16(i16 %a, i16 %b) {
+; LABEL: @ab_ugt_i16
+; CHECK: max.s16
+ %cmp = icmp sgt i16 %a, %b
+ %sel = select i1 %cmp, i16 %a, i16 %b
+ ret i16 %sel
+}
+
+define i16 @ab_sge_i16(i16 %a, i16 %b) {
+; LABEL: @ab_sge_i16
+; CHECK: max.s16
+ %cmp = icmp sge i16 %a, %b
+ %sel = select i1 %cmp, i16 %a, i16 %b
+ ret i16 %sel
+}
+
+define i16 @ab_slt_i16(i16 %a, i16 %b) {
+; LABEL: @ab_slt_i16
+; CHECK: min.s16
+ %cmp = icmp slt i16 %a, %b
+ %sel = select i1 %cmp, i16 %a, i16 %b
+ ret i16 %sel
+}
+
+define i16 @ab_sle_i16(i16 %a, i16 %b) {
+; LABEL: @ab_sle_i16
+; CHECK: min.s16
+ %cmp = icmp sle i16 %a, %b
+ %sel = select i1 %cmp, i16 %a, i16 %b
+ ret i16 %sel
+}
+
+; *** ba, unsigned, i16
+define i16 @ba_ugt_i16(i16 %a, i16 %b) {
+; LABEL: @ba_ugt_i16
+; CHECK: min.u16
+ %cmp = icmp ugt i16 %a, %b
+ %sel = select i1 %cmp, i16 %b, i16 %a
+ ret i16 %sel
+}
+
+define i16 @ba_uge_i16(i16 %a, i16 %b) {
+; LABEL: @ba_uge_i16
+; CHECK: min.u16
+ %cmp = icmp uge i16 %a, %b
+ %sel = select i1 %cmp, i16 %b, i16 %a
+ ret i16 %sel
+}
+
+define i16 @ba_ult_i16(i16 %a, i16 %b) {
+; LABEL: @ba_ult_i16
+; CHECK: max.u16
+ %cmp = icmp ult i16 %a, %b
+ %sel = select i1 %cmp, i16 %b, i16 %a
+ ret i16 %sel
+}
+
+define i16 @ba_ule_i16(i16 %a, i16 %b) {
+; LABEL: @ba_ule_i16
+; CHECK: max.u16
+ %cmp = icmp ule i16 %a, %b
+ %sel = select i1 %cmp, i16 %b, i16 %a
+ ret i16 %sel
+}
+
+; *** ba, signed, i16
+define i16 @ba_sgt_i16(i16 %a, i16 %b) {
+; LBAEL: @ba_ugt_i16
+; CHECK: min.s16
+ %cmp = icmp sgt i16 %a, %b
+ %sel = select i1 %cmp, i16 %b, i16 %a
+ ret i16 %sel
+}
+
+define i16 @ba_sge_i16(i16 %a, i16 %b) {
+; LABEL: @ba_sge_i16
+; CHECK: min.s16
+ %cmp = icmp sge i16 %a, %b
+ %sel = select i1 %cmp, i16 %b, i16 %a
+ ret i16 %sel
+}
+
+define i16 @ba_slt_i16(i16 %a, i16 %b) {
+; LABEL: @ba_slt_i16
+; CHECK: max.s16
+ %cmp = icmp slt i16 %a, %b
+ %sel = select i1 %cmp, i16 %b, i16 %a
+ ret i16 %sel
+}
+
+define i16 @ba_sle_i16(i16 %a, i16 %b) {
+; LABEL: @ba_sle_i16
+; CHECK: max.s16
+ %cmp = icmp sle i16 %a, %b
+ %sel = select i1 %cmp, i16 %b, i16 %a
+ ret i16 %sel
+}
; *************************************
; * All variations with i32
OpenPOWER on IntegriCloud