summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC/load-two-flts.ll
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2016-03-31 02:56:05 +0000
committerHal Finkel <hfinkel@anl.gov>2016-03-31 02:56:05 +0000
commit851b33a0b1e3c0377f7f8f6262e9ce6a711de235 (patch)
tree40bb9aab0ad5ea295c04d8de1b26200c57bbdbc6 /llvm/test/CodeGen/PowerPC/load-two-flts.ll
parent8ed5cac97c1c7876fcde080bd58ee4969347a8a7 (diff)
downloadbcm5719-llvm-851b33a0b1e3c0377f7f8f6262e9ce6a711de235.tar.gz
bcm5719-llvm-851b33a0b1e3c0377f7f8f6262e9ce6a711de235.zip
[PowerPC] Load two floats directly instead of using one 64-bit integer load
When dealing with complex<float>, and similar structures with two single-precision floating-point numbers, especially when such things are being passed around by value, we'll sometimes end up loading both float values by extracting them from one 64-bit integer load. It looks like this: t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 t16: i64 = srl t13, Constant:i32<32> t17: i32 = truncate t16 t18: f32 = bitcast t17 t19: i32 = truncate t13 t20: f32 = bitcast t19 The problem, especially before the P8 where those bitcasts aren't legal (and get expanded via the stack), is that it would have been better to use two floating-point loads directly. Here we add a target-specific DAGCombine to do just that. In short, we turn: ld 3, 0(5) stw 3, -8(1) rldicl 3, 3, 32, 32 stw 3, -4(1) lfs 3, -4(1) lfs 0, -8(1) into: lfs 3, 4(5) lfs 0, 0(5) llvm-svn: 264988
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/load-two-flts.ll')
-rw-r--r--llvm/test/CodeGen/PowerPC/load-two-flts.ll60
1 files changed, 60 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/load-two-flts.ll b/llvm/test/CodeGen/PowerPC/load-two-flts.ll
new file mode 100644
index 00000000000..270a852b1b0
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/load-two-flts.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define void @_Z4testSt7complexIfE(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
+entry:
+ %v2 = load i64, i64* %ref.tmp, align 8
+ %v3 = lshr i64 %v2, 32
+ %v4 = trunc i64 %v3 to i32
+ %v5 = bitcast i32 %v4 to float
+ %v6 = trunc i64 %v2 to i32
+ %v7 = bitcast i32 %v6 to float
+ %mul_ad.i.i = fmul fast float %v5, %v1
+ %mul_bc.i.i = fmul fast float %v7, %v0
+ %mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
+ %mul_ac.i.i = fmul fast float %v5, %v0
+ %mul_bd.i.i = fmul fast float %v7, %v1
+ %mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
+ store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4
+ store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4
+ ret void
+
+; CHECK-LABEL: @_Z4testSt7complexIfE
+; CHECK-NOT: ld {{[0-9]+}}, 0(5)
+; CHECK-NOT: stw
+; CHECK-NOT: rldicl
+; CHECK-DAG: lfs {{[0-9]+}}, 4(5)
+; CHECK-DAG: lfs {{[0-9]+}}, 0(5)
+; CHECK: blr
+}
+
+define i64* @_Z4testSt7complexIfE_idx(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
+entry:
+ %r = getelementptr i64, i64* %ref.tmp, i64 1
+ %v2 = load i64, i64* %r, align 8
+ %v3 = lshr i64 %v2, 32
+ %v4 = trunc i64 %v3 to i32
+ %v5 = bitcast i32 %v4 to float
+ %v6 = trunc i64 %v2 to i32
+ %v7 = bitcast i32 %v6 to float
+ %mul_ad.i.i = fmul fast float %v5, %v1
+ %mul_bc.i.i = fmul fast float %v7, %v0
+ %mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
+ %mul_ac.i.i = fmul fast float %v5, %v0
+ %mul_bd.i.i = fmul fast float %v7, %v1
+ %mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
+ store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4
+ store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4
+ ret i64* %r
+
+; CHECK-LABEL: @_Z4testSt7complexIfE
+; CHECK-NOT: ld {{[0-9]+}}, 8(5)
+; CHECK-NOT: ldu {{[0-9]+}}, 8(5)
+; CHECK-NOT: stw
+; CHECK-NOT: rldicl
+; CHECK-DAG: lfsu {{[0-9]+}}, 8(5)
+; CHECK-DAG: lfs {{[0-9]+}}, 4(5)
+; CHECK: blr
+}
+
OpenPOWER on IntegriCloud