summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
diff options
context:
space:
mode:
authorJustin Holewinski <jholewinski@nvidia.com>2012-05-04 20:18:50 +0000
committerJustin Holewinski <jholewinski@nvidia.com>2012-05-04 20:18:50 +0000
commitae556d3ef72dfe5f40a337b7071f42b7bf5b66a4 (patch)
tree14ad103ff9863d609096fcf56552790c407f13d5 /llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
parent2420e8b7d591acce85b253f4b234c1a8c05fa5e6 (diff)
downloadbcm5719-llvm-ae556d3ef72dfe5f40a337b7071f42b7bf5b66a4.tar.gz
bcm5719-llvm-ae556d3ef72dfe5f40a337b7071f42b7bf5b66a4.zip
This patch adds a new NVPTX back-end to LLVM which supports code generation for NVIDIA PTX 3.0. This back-end will (eventually) replace the current PTX back-end, while maintaining compatibility with it.
The new target machines are: nvptx (old ptx32) => 32-bit PTX nvptx64 (old ptx64) => 64-bit PTX The sources are based on the internal NVIDIA NVPTX back-end, and contain more functionality than the current PTX back-end currently provides. NV_CONTRIB llvm-svn: 156196
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll')
-rw-r--r--llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll72
1 files changed, 72 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll b/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
new file mode 100644
index 00000000000..e474fa4df5c
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+;; These tests should run for all targets
+
+;;===-- Basic instruction selection tests ---------------------------------===;;
+
+
+;;; f64
+
+define double @fadd_f64(double %a, double %b) {
+; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fadd double %a, %b
+ ret double %ret
+}
+
+define double @fsub_f64(double %a, double %b) {
+; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fsub double %a, %b
+ ret double %ret
+}
+
+define double @fmul_f64(double %a, double %b) {
+; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fmul double %a, %b
+ ret double %ret
+}
+
+define double @fdiv_f64(double %a, double %b) {
+; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fdiv double %a, %b
+ ret double %ret
+}
+
+;; PTX does not have a floating-point rem instruction
+
+
+;;; f32
+
+define float @fadd_f32(float %a, float %b) {
+; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fadd float %a, %b
+ ret float %ret
+}
+
+define float @fsub_f32(float %a, float %b) {
+; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fsub float %a, %b
+ ret float %ret
+}
+
+define float @fmul_f32(float %a, float %b) {
+; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fmul float %a, %b
+ ret float %ret
+}
+
+define float @fdiv_f32(float %a, float %b) {
+; CHECK: div.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fdiv float %a, %b
+ ret float %ret
+}
+
+;; PTX does not have a floating-point rem instruction
OpenPOWER on IntegriCloud