summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorScott Michel <scottm@aero.org>2008-01-11 02:53:15 +0000
committerScott Michel <scottm@aero.org>2008-01-11 02:53:15 +0000
commit8d5841ae3c537699c9c1acde5142a0dccb7b800e (patch)
tree3988f1cd626c25dbc4e7f1fc010b5895e071df0f /llvm/test
parent4cc275c3fb6a0398708fd61e286cf45a472d8037 (diff)
downloadbcm5719-llvm-8d5841ae3c537699c9c1acde5142a0dccb7b800e.tar.gz
bcm5719-llvm-8d5841ae3c537699c9c1acde5142a0dccb7b800e.zip
More CellSPU refinement and progress:
- Cleaned up custom load/store logic, common code is now shared [see note below], cleaned up address modes - More test cases: various intrinsics, structure element access (load/store test), updated target data strings, indirect function calls. Note: This patch contains a refactoring of the LoadSDNode and StoreSDNode structures: they now share a common base class, LSBaseSDNode, that provides an interface to their common functionality. There is some hackery to access the proper operand depending on the derived class; otherwise, to do a proper job would require finding and rearranging the SDOperands sent to StoreSDNode's constructor. The current refactor errs on the side of being conservatively and backwardly compatible while providing functionality that reduces redundant code for targets where loads and stores are custom-lowered. llvm-svn: 45851
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/CellSPU/and_ops.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/call_indirect.ll29
-rw-r--r--llvm/test/CodeGen/CellSPU/ctpop.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/dp_farith.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/eqv.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/extract_elt.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/fcmp.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/fdiv.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/fneg-fabs.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/immed16.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/immed32.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/immed64.ll3
-rw-r--r--llvm/test/CodeGen/CellSPU/int2fp.ll3
-rw-r--r--llvm/test/CodeGen/CellSPU/intrinsics_branch.ll150
-rw-r--r--llvm/test/CodeGen/CellSPU/intrinsics_float.ll94
-rw-r--r--llvm/test/CodeGen/CellSPU/intrinsics_logical.ll49
-rw-r--r--llvm/test/CodeGen/CellSPU/nand.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/or_ops.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/rotate_ops.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/select_bits.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/shift_ops.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/sp_farith.ll2
-rw-r--r--llvm/test/CodeGen/CellSPU/struct_1.ll107
23 files changed, 467 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/CellSPU/and_ops.ll b/llvm/test/CodeGen/CellSPU/and_ops.ll
index f23355ee53c..6858dbabe64 100644
--- a/llvm/test/CodeGen/CellSPU/and_ops.ll
+++ b/llvm/test/CodeGen/CellSPU/and_ops.ll
@@ -4,6 +4,8 @@
; RUN: grep andi %t1.s | count 36
; RUN: grep andhi %t1.s | count 30
; RUN: grep andbi %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; AND instruction generation:
define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
diff --git a/llvm/test/CodeGen/CellSPU/call_indirect.ll b/llvm/test/CodeGen/CellSPU/call_indirect.ll
new file mode 100644
index 00000000000..7aa8abc0040
--- /dev/null
+++ b/llvm/test/CodeGen/CellSPU/call_indirect.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep bisl %t1.s | count 6 &&
+; RUN: grep ila %t1.s | count 1 &&
+; RUN: grep rotqbyi %t1.s | count 4 &&
+; RUN: grep lqa %t1.s | count 4 &&
+; RUN: grep lqd %t1.s | count 6 &&
+; RUN: grep dispatch_tab %t1.s | count 10
+; ModuleID = 'call_indirect.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16
+
+define void @dispatcher(i32 %i_arg, float %f_arg) {
+entry:
+ %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16
+ tail call void %tmp2( i32 %i_arg, float %f_arg )
+ %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4
+ tail call void %tmp2.1( i32 %i_arg, float %f_arg )
+ %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4
+ tail call void %tmp2.2( i32 %i_arg, float %f_arg )
+ %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4
+ tail call void %tmp2.3( i32 %i_arg, float %f_arg )
+ %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4
+ tail call void %tmp2.4( i32 %i_arg, float %f_arg )
+ %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4
+ tail call void %tmp2.5( i32 %i_arg, float %f_arg )
+ ret void
+}
diff --git a/llvm/test/CodeGen/CellSPU/ctpop.ll b/llvm/test/CodeGen/CellSPU/ctpop.ll
index 3e2bc64f4d8..406a20accc8 100644
--- a/llvm/test/CodeGen/CellSPU/ctpop.ll
+++ b/llvm/test/CodeGen/CellSPU/ctpop.ll
@@ -3,6 +3,8 @@
; RUN: grep andi %t1.s | count 3 &&
; RUN: grep rotmi %t1.s | count 2 &&
; RUN: grep rothmi %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
declare i32 @llvm.ctpop.i8(i8)
declare i32 @llvm.ctpop.i16(i16)
diff --git a/llvm/test/CodeGen/CellSPU/dp_farith.ll b/llvm/test/CodeGen/CellSPU/dp_farith.ll
index 58c56e14705..5cdb33ee681 100644
--- a/llvm/test/CodeGen/CellSPU/dp_farith.ll
+++ b/llvm/test/CodeGen/CellSPU/dp_farith.ll
@@ -7,6 +7,8 @@
; RUN: grep dfnms %t1.s | count 4
;
; This file includes double precision floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define double @fadd(double %arg1, double %arg2) {
%A = add double %arg1, %arg2
diff --git a/llvm/test/CodeGen/CellSPU/eqv.ll b/llvm/test/CodeGen/CellSPU/eqv.ll
index a4d6dbbbd4e..0f02180b226 100644
--- a/llvm/test/CodeGen/CellSPU/eqv.ll
+++ b/llvm/test/CodeGen/CellSPU/eqv.ll
@@ -10,6 +10,8 @@
; Alternatively, a ^ ~b, which the compiler will also match.
; ModuleID = 'eqv.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
%A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
diff --git a/llvm/test/CodeGen/CellSPU/extract_elt.ll b/llvm/test/CodeGen/CellSPU/extract_elt.ll
index ab485a81fd3..f9cc32e8f29 100644
--- a/llvm/test/CodeGen/CellSPU/extract_elt.ll
+++ b/llvm/test/CodeGen/CellSPU/extract_elt.ll
@@ -5,6 +5,8 @@
; RUN: grep lqx %t2.s | count 27 &&
; RUN: grep space %t1.s | count 8 &&
; RUN: grep byte %t1.s | count 424
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define i32 @i32_extract_0(<4 x i32> %v) {
entry:
diff --git a/llvm/test/CodeGen/CellSPU/fcmp.ll b/llvm/test/CodeGen/CellSPU/fcmp.ll
index 8ae97e6ff59..f4406d63dfb 100644
--- a/llvm/test/CodeGen/CellSPU/fcmp.ll
+++ b/llvm/test/CodeGen/CellSPU/fcmp.ll
@@ -3,6 +3,8 @@
; RUN: grep fcmeq %t1.s | count 1
;
; This file includes standard floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
declare double @fabs(double)
declare float @fabsf(float)
diff --git a/llvm/test/CodeGen/CellSPU/fdiv.ll b/llvm/test/CodeGen/CellSPU/fdiv.ll
index d55b12b9f51..a107bbe1f73 100644
--- a/llvm/test/CodeGen/CellSPU/fdiv.ll
+++ b/llvm/test/CodeGen/CellSPU/fdiv.ll
@@ -6,6 +6,8 @@
; RUN: grep fnms %t1.s | count 2
;
; This file includes standard floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define float @fdiv32(float %arg1, float %arg2) {
%A = fdiv float %arg1, %arg2
diff --git a/llvm/test/CodeGen/CellSPU/fneg-fabs.ll b/llvm/test/CodeGen/CellSPU/fneg-fabs.ll
index 1abdcf6a34d..a183483cded 100644
--- a/llvm/test/CodeGen/CellSPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -4,6 +4,8 @@
; RUN: grep xor %t1.s | count 4 &&
; RUN: grep and %t1.s | count 5 &&
; RUN: grep andbi %t1.s | count 3
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define double @fneg_dp(double %X) {
%Y = sub double -0.000000e+00, %X
diff --git a/llvm/test/CodeGen/CellSPU/immed16.ll b/llvm/test/CodeGen/CellSPU/immed16.ll
index 19cabc4e94c..603ec058e97 100644
--- a/llvm/test/CodeGen/CellSPU/immed16.ll
+++ b/llvm/test/CodeGen/CellSPU/immed16.ll
@@ -1,5 +1,7 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep "ilh" %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define i16 @test_1() {
%x = alloca i16, align 16
diff --git a/llvm/test/CodeGen/CellSPU/immed32.ll b/llvm/test/CodeGen/CellSPU/immed32.ll
index 6a5a3615619..4bf5bbd517a 100644
--- a/llvm/test/CodeGen/CellSPU/immed32.ll
+++ b/llvm/test/CodeGen/CellSPU/immed32.ll
@@ -12,6 +12,8 @@
; RUN: grep 49077 %t1.s | count 1 &&
; RUN: grep 1267 %t1.s | count 2 &&
; RUN: grep 16309 %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define i32 @test_1() {
ret i32 4784128 ;; ILHU via pattern (0x49000)
diff --git a/llvm/test/CodeGen/CellSPU/immed64.ll b/llvm/test/CodeGen/CellSPU/immed64.ll
index c4eec8ba81e..4d388b1d223 100644
--- a/llvm/test/CodeGen/CellSPU/immed64.ll
+++ b/llvm/test/CodeGen/CellSPU/immed64.ll
@@ -11,6 +11,9 @@
; RUN: grep 128 %t1.s | count 30 &&
; RUN: grep 224 %t1.s | count 2
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
; 1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202)
; 18446744073709551591 => 0x ffffffff ffffffe7 (-25)
; 18446744073708516742 => 0x ffffffff fff03586 (-1034874)
diff --git a/llvm/test/CodeGen/CellSPU/int2fp.ll b/llvm/test/CodeGen/CellSPU/int2fp.ll
index 95a498428ec..b4cfea8a0b4 100644
--- a/llvm/test/CodeGen/CellSPU/int2fp.ll
+++ b/llvm/test/CodeGen/CellSPU/int2fp.ll
@@ -7,6 +7,9 @@
; RUN: grep andi %t1.s | count 1 &&
; RUN: grep ila %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
define float @sitofp_i32(i32 %arg1) {
%A = sitofp i32 %arg1 to float ; <float> [#uses=1]
ret float %A
diff --git a/llvm/test/CodeGen/CellSPU/intrinsics_branch.ll b/llvm/test/CodeGen/CellSPU/intrinsics_branch.ll
new file mode 100644
index 00000000000..5051cd56994
--- /dev/null
+++ b/llvm/test/CodeGen/CellSPU/intrinsics_branch.ll
@@ -0,0 +1,150 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep ceq %t1.s | count 30 &&
+; RUN: grep ceqb %t1.s | count 10 &&
+; RUN: grep ceqhi %t1.s | count 5 &&
+; RUN: grep ceqi %t1.s | count 5 &&
+; RUN: grep cgt %t1.s | count 30 &&
+; RUN: grep cgtb %t1.s | count 10 &&
+; RUN: grep cgthi %t1.s | count 5 &&
+; RUN: grep cgti %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8)
+
+
+
+define <4 x i32> @test(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @ceqitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgtitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgtitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
diff --git a/llvm/test/CodeGen/CellSPU/intrinsics_float.ll b/llvm/test/CodeGen/CellSPU/intrinsics_float.ll
new file mode 100644
index 00000000000..f5a192a0a7b
--- /dev/null
+++ b/llvm/test/CodeGen/CellSPU/intrinsics_float.ll
@@ -0,0 +1,94 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep fa %t1.s | count 5 &&
+; RUN: grep fs %t1.s | count 5 &&
+; RUN: grep fm %t1.s | count 15 &&
+; RUN: grep fceq %t1.s | count 5 &&
+; RUN: grep fcmeq %t1.s | count 5 &&
+; RUN: grep fcgt %t1.s | count 5 &&
+; RUN: grep fcmgt %t1.s | count 5 &&
+; RUN: grep fma %t1.s | count 5 &&
+; RUN: grep fnms %t1.s | count 5 &&
+; RUN: grep fms %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>)
+
+define <4 x i32> @test(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+} \ No newline at end of file
diff --git a/llvm/test/CodeGen/CellSPU/intrinsics_logical.ll b/llvm/test/CodeGen/CellSPU/intrinsics_logical.ll
new file mode 100644
index 00000000000..e43558c0f25
--- /dev/null
+++ b/llvm/test/CodeGen/CellSPU/intrinsics_logical.ll
@@ -0,0 +1,49 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep and %t1.s | count 20 &&
+; RUN: grep andc %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @anditest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @andhitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
diff --git a/llvm/test/CodeGen/CellSPU/nand.ll b/llvm/test/CodeGen/CellSPU/nand.ll
index 091f4b2edcc..841a3ec54d6 100644
--- a/llvm/test/CodeGen/CellSPU/nand.ll
+++ b/llvm/test/CodeGen/CellSPU/nand.ll
@@ -3,6 +3,8 @@
; RUN: grep and %t1.s | count 94
; RUN: grep xsbh %t1.s | count 2
; RUN: grep xshw %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
%A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1]
diff --git a/llvm/test/CodeGen/CellSPU/or_ops.ll b/llvm/test/CodeGen/CellSPU/or_ops.ll
index 6c46b413871..91e3e2145ab 100644
--- a/llvm/test/CodeGen/CellSPU/or_ops.ll
+++ b/llvm/test/CodeGen/CellSPU/or_ops.ll
@@ -4,6 +4,8 @@
; RUN: grep ori %t1.s | count 30
; RUN: grep orhi %t1.s | count 30
; RUN: grep orbi %t1.s | count 15
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; OR instruction generation:
define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
diff --git a/llvm/test/CodeGen/CellSPU/rotate_ops.ll b/llvm/test/CodeGen/CellSPU/rotate_ops.ll
index 6983c184c3c..0386838a555 100644
--- a/llvm/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/llvm/test/CodeGen/CellSPU/rotate_ops.ll
@@ -8,6 +8,8 @@
; RUN grep rothi.*,.3 %t1.s | count 1
; RUN: grep andhi %t1.s | count 4
; RUN: grep shlhi %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; Vector rotates are not currently supported in gcc or llvm assembly. These are
; not tested.
diff --git a/llvm/test/CodeGen/CellSPU/select_bits.ll b/llvm/test/CodeGen/CellSPU/select_bits.ll
index 3cbb7a06dc7..b1600bf8f2b 100644
--- a/llvm/test/CodeGen/CellSPU/select_bits.ll
+++ b/llvm/test/CodeGen/CellSPU/select_bits.ll
@@ -3,6 +3,8 @@
; RUN: grep and %t1.s | count 2
; RUN: grep xsbh %t1.s | count 1
; RUN: grep xshw %t1.s | count 2
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define <16 x i8> @selb_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
%A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
diff --git a/llvm/test/CodeGen/CellSPU/shift_ops.ll b/llvm/test/CodeGen/CellSPU/shift_ops.ll
index 162ca16776b..4256d91fdb3 100644
--- a/llvm/test/CodeGen/CellSPU/shift_ops.ll
+++ b/llvm/test/CodeGen/CellSPU/shift_ops.ll
@@ -5,6 +5,8 @@
; RUN: grep shli %t1.s | count 51
; RUN: grep xshw %t1.s | count 5
; RUN: grep and %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; Vector shifts are not currently supported in gcc or llvm assembly. These are
; not tested.
diff --git a/llvm/test/CodeGen/CellSPU/sp_farith.ll b/llvm/test/CodeGen/CellSPU/sp_farith.ll
index c7e719982d6..473e9a3ecec 100644
--- a/llvm/test/CodeGen/CellSPU/sp_farith.ll
+++ b/llvm/test/CodeGen/CellSPU/sp_farith.ll
@@ -8,6 +8,8 @@
;
; This file includes standard floating point arithmetic instructions
; NOTE fdiv is tested separately since it is a compound operation
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define float @fp_add(float %arg1, float %arg2) {
%A = add float %arg1, %arg2 ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/CellSPU/struct_1.ll b/llvm/test/CodeGen/CellSPU/struct_1.ll
new file mode 100644
index 00000000000..1159b55fab1
--- /dev/null
+++ b/llvm/test/CodeGen/CellSPU/struct_1.ll
@@ -0,0 +1,107 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep lqa %t1.s | count 10 &&
+; RUN: grep lqd %t1.s | count 2 &&
+; RUN: grep rotqbyi %t1.s | count 5 &&
+; RUN: grep xshw %t1.s | count 1 &&
+; RUN: grep andi %t1.s | count 4 &&
+; RUN: grep cbd %t1.s | count 3 &&
+; RUN: grep chd %t1.s | count 1 &&
+; RUN: grep cwd %t1.s | count 1 &&
+; RUN: grep shufb %t1.s | count 5 &&
+; RUN: grep stqa %t1.s | count 5
+; ModuleID = 'struct_1.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; struct hackstate {
+; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3)
+; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3)
+; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3)
+; int i1; // offset 4 (rotate left by 4 bytes to byte 0)
+; short s1; // offset 8 (rotate left by 6 bytes to byte 2)
+; int i2; // offset 12 [ignored]
+; unsigned char c4; // offset 16 [ignored]
+; unsigned char c5; // offset 17 [ignored]
+; unsigned char c6; // offset 18 [ignored]
+; unsigned char c7; // offset 19 (no rotate, in preferred slot)
+; int i3; // offset 20 [ignored]
+; int i4; // offset 24 [ignored]
+; int i5; // offset 28 [ignored]
+; int i6; // offset 32 (no rotate, in preferred slot)
+; }
+%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 }
+
+; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+@state = global %struct.hackstate zeroinitializer, align 16
+
+define i8 @get_hackstate_c1() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+ ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c2() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+ ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c3() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+ ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i1() {
+entry:
+ %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+ ret i32 %tmp2
+}
+
+define i16 @get_hackstate_s1() signext {
+entry:
+ %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+ ret i16 %tmp2
+}
+
+define i8 @get_hackstate_c7() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
+ ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i6() zeroext {
+entry:
+ %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
+ ret i32 %tmp2
+}
+
+define void @set_hackstate_c1(i8 zeroext %c) {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+ ret void
+}
+
+define void @set_hackstate_c2(i8 zeroext %c) {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+ ret void
+}
+
+define void @set_hackstate_c3(i8 zeroext %c) {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+ ret void
+}
+
+define void @set_hackstate_i1(i32 %i) {
+entry:
+ store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+ ret void
+}
+
+define void @set_hackstate_s1(i16 signext %s) {
+entry:
+ store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+ ret void
+}
OpenPOWER on IntegriCloud