diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-06-28 17:40:20 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-06-28 17:40:20 +0000 |
| commit | 54822d143211094e843f523214ae601efe630877 (patch) | |
| tree | ac288c381c77358aaaabcd056f6e66207566f65a /llvm | |
| parent | 1416c2d0529c68364d77dbbc385649e516a07842 (diff) | |
| download | bcm5719-llvm-54822d143211094e843f523214ae601efe630877.tar.gz bcm5719-llvm-54822d143211094e843f523214ae601efe630877.zip | |
[BBVectorize] Regenerate simple tests
llvm-svn: 306571
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/test/Transforms/BBVectorize/simple-int.ll | 782 | ||||
| -rw-r--r-- | llvm/test/Transforms/BBVectorize/simple.ll | 324 | ||||
| -rw-r--r-- | llvm/test/Transforms/BBVectorize/simple3.ll | 59 | ||||
| -rw-r--r-- | llvm/test/Transforms/BBVectorize/vector-sel.ll | 22 |
4 files changed, 609 insertions, 578 deletions
diff --git a/llvm/test/Transforms/BBVectorize/simple-int.ll b/llvm/test/Transforms/BBVectorize/simple-int.ll index b7f87fe1db0..dd5e90841a7 100644 --- a/llvm/test/Transforms/BBVectorize/simple-int.ll +++ b/llvm/test/Transforms/BBVectorize/simple-int.ll @@ -1,6 +1,8 @@ -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + declare double @llvm.fma.f64(double, double, double) declare double @llvm.fmuladd.f64(double, double, double) declare double @llvm.cos.f64(double) @@ -20,473 +22,479 @@ declare i64 @llvm.cttz.i64(i64, i1) ; Basic depth-3 chain with fma define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1) - %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R ; CHECK-LABEL: @test1( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0 -; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1 -; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1_V_I2_1:%.*]] = insertelement <2 x double> undef, double [[C1:%.*]], i32 0 +; CHECK-NEXT: [[Y1_V_I2_2:%.*]] = insertelement <2 x double> [[Y1_V_I2_1]], double [[C2:%.*]], i32 1 +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[X1]], <2 x double> [[X1_V_I0_2]], <2 x double> [[Y1_V_I2_2]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1) + %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with fmuladd define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1) - %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R ; CHECK-LABEL: @test1a( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0 -; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1 -; CHECK: %Y1 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1_V_I2_1:%.*]] = insertelement <2 x double> undef, double [[C1:%.*]], i32 0 +; CHECK-NEXT: [[Y1_V_I2_2:%.*]] = insertelement <2 x double> [[Y1_V_I2_1]], double [[C2:%.*]], i32 1 +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[X1]], <2 x double> [[X1_V_I0_2]], <2 x double> [[Y1_V_I2_2]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1) + %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with cos define double @test2(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.cos.f64(double %X1) - %Y2 = call double @llvm.cos.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R ; CHECK-LABEL: @test2( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.cos.f64(double %X1) + %Y2 = call double @llvm.cos.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with powi define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) { - - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) - %Y2 = call double @llvm.powi.f64(double %X2, i32 %P) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R ; CHECK-LABEL: @test3( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.powi.v2f64(<2 x double> [[X1]], i32 [[P:%.*]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) + %Y2 = call double @llvm.powi.f64(double %X2, i32 %P) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with powi (different powers: should not vectorize) define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) { - - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %P2 = add i32 %P, 1 - %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) - %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R ; CHECK-LABEL: @test4( -; CHECK-NOT: <2 x double> -; CHECK: ret double %R +; CHECK-NEXT: [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]] +; CHECK-NEXT: [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]] +; CHECK-NEXT: [[P2:%.*]] = add i32 [[P:%.*]], 1 +; CHECK-NEXT: [[Y1:%.*]] = call double @llvm.powi.f64(double [[X1]], i32 [[P]]) +; CHECK-NEXT: [[Y2:%.*]] = call double @llvm.powi.f64(double [[X2]], i32 [[P2]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd double [[Y1]], [[B1]] +; CHECK-NEXT: [[Z2:%.*]] = fadd double [[Y2]], [[B2]] +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1]], [[Z2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %P2 = add i32 %P, 1 + %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) + %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with round define double @testround(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.round.f64(double %X1) - %Y2 = call double @llvm.round.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testround -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.round.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testround( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.round.f64(double %X1) + %Y2 = call double @llvm.round.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with copysign define double @testcopysign(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.copysign.f64(double %X1, double %A1) - %Y2 = call double @llvm.copysign.f64(double %X2, double %A1) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testcopysign -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1.v.i1.2 = shufflevector <2 x double> %X1.v.i0.1, <2 x double> undef, <2 x i32> zeroinitializer -; CHECK: %Y1 = call <2 x double> @llvm.copysign.v2f64(<2 x double> %X1, <2 x double> %Y1.v.i1.2) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testcopysign( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1_V_I1_2:%.*]] = shufflevector <2 x double> [[X1_V_I0_1]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[X1]], <2 x double> [[Y1_V_I1_2]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.copysign.f64(double %X1, double %A1) + %Y2 = call double @llvm.copysign.f64(double %X2, double %A1) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with ceil define double @testceil(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.ceil.f64(double %X1) - %Y2 = call double @llvm.ceil.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testceil -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testceil( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.ceil.f64(double %X1) + %Y2 = call double @llvm.ceil.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with nearbyint define double @testnearbyint(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.nearbyint.f64(double %X1) - %Y2 = call double @llvm.nearbyint.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testnearbyint -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testnearbyint( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.nearbyint.f64(double %X1) + %Y2 = call double @llvm.nearbyint.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with rint define double @testrint(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.rint.f64(double %X1) - %Y2 = call double @llvm.rint.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testrint -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.rint.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testrint( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.rint.f64(double %X1) + %Y2 = call double @llvm.rint.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with trunc define double @testtrunc(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.trunc.f64(double %X1) - %Y2 = call double @llvm.trunc.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testtrunc -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testtrunc( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.trunc.f64(double %X1) + %Y2 = call double @llvm.trunc.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with floor define double @testfloor(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.floor.f64(double %X1) - %Y2 = call double @llvm.floor.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testfloor -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testfloor( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.floor.f64(double %X1) + %Y2 = call double @llvm.floor.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with fabs define double @testfabs(double %A1, double %A2, double %B1, double %B2) { - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %Y1 = call double @llvm.fabs.f64(double %X1) - %Y2 = call double @llvm.fabs.f64(double %X2) - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %R = fmul double %Z1, %Z2 - ret double %R -; CHECK: @testfabs -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %X1) -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: ret double %R - +; CHECK-LABEL: @testfabs( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.fabs.f64(double %X1) + %Y2 = call double @llvm.fabs.f64(double %X2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain with bswap define i64 @testbswap(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.bswap.i64(i64 %X1) - %Y2 = call i64 @llvm.bswap.i64(i64 %X2) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testbswap -; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 -; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %X1) -; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 -; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 -; CHECK: ret i64 %R - +; CHECK-LABEL: @testbswap( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.bswap.i64(i64 %X1) + %Y2 = call i64 @llvm.bswap.i64(i64 %X2) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } ; Basic depth-3 chain with ctpop define i64 @testctpop(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.ctpop.i64(i64 %X1) - %Y2 = call i64 @llvm.ctpop.i64(i64 %X2) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testctpop -; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 -; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %X1) -; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 -; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 -; CHECK: ret i64 %R - +; CHECK-LABEL: @testctpop( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[X1]]) +; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.ctpop.i64(i64 %X1) + %Y2 = call i64 @llvm.ctpop.i64(i64 %X2) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } ; Basic depth-3 chain with ctlz define i64 @testctlz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) - %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testctlz -; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 -; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %X1, i1 true) -; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 -; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 -; CHECK: ret i64 %R +; CHECK-LABEL: @testctlz( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[X1]], i1 true) +; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) + %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } ; Basic depth-3 chain with ctlz define i64 @testctlzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) - %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testctlzneg -; CHECK: %X1 = sub i64 %A1, %B1 -; CHECK: %X2 = sub i64 %A2, %B2 -; CHECK: %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) -; CHECK: %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false) -; CHECK: %Z1 = add i64 %Y1, %B1 -; CHECK: %Z2 = add i64 %Y2, %B2 -; CHECK: %R = mul i64 %Z1, %Z2 -; CHECK: ret i64 %R +; CHECK-LABEL: @testctlzneg( +; CHECK-NEXT: [[X1:%.*]] = sub i64 [[A1:%.*]], [[B1:%.*]] +; CHECK-NEXT: [[X2:%.*]] = sub i64 [[A2:%.*]], [[B2:%.*]] +; CHECK-NEXT: [[Y1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[X1]], i1 true), !range !0 +; CHECK-NEXT: [[Y2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[X2]], i1 false), !range !0 +; CHECK-NEXT: [[Z1:%.*]] = add i64 [[Y1]], [[B1]] +; CHECK-NEXT: [[Z2:%.*]] = add i64 [[Y2]], [[B2]] +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1]], [[Z2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) + %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } ; Basic depth-3 chain with cttz define i64 @testcttz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) - %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testcttz -; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 -; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 -; CHECK: %Y1 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %X1, i1 true) -; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 -; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 -; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 -; CHECK: ret i64 %R +; CHECK-LABEL: @testcttz( +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[X1]], i1 true) +; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) + %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } ; Basic depth-3 chain with cttz define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { - %X1 = sub i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 - %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) - %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false) - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 - %R = mul i64 %Z1, %Z2 - ret i64 %R - -; CHECK: @testcttzneg -; CHECK: %X1 = sub i64 %A1, %B1 -; CHECK: %X2 = sub i64 %A2, %B2 -; CHECK: %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) -; CHECK: %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false) -; CHECK: %Z1 = add i64 %Y1, %B1 -; CHECK: %Z2 = add i64 %Y2, %B2 -; CHECK: %R = mul i64 %Z1, %Z2 -; CHECK: ret i64 %R +; CHECK-LABEL: @testcttzneg( +; CHECK-NEXT: [[X1:%.*]] = sub i64 [[A1:%.*]], [[B1:%.*]] +; CHECK-NEXT: [[X2:%.*]] = sub i64 [[A2:%.*]], [[B2:%.*]] +; CHECK-NEXT: [[Y1:%.*]] = call i64 @llvm.cttz.i64(i64 [[X1]], i1 true), !range !0 +; CHECK-NEXT: [[Y2:%.*]] = call i64 @llvm.cttz.i64(i64 [[X2]], i1 false), !range !0 +; CHECK-NEXT: [[Z1:%.*]] = add i64 [[Y1]], [[B1]] +; CHECK-NEXT: [[Z2:%.*]] = add i64 [[Y2]], [[B2]] +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1]], [[Z2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) + %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false) + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } - - ; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 ; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 ; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0 diff --git a/llvm/test/Transforms/BBVectorize/simple.ll b/llvm/test/Transforms/BBVectorize/simple.ll index 0fe33f17a64..12f97ab77ba 100644 --- a/llvm/test/Transforms/BBVectorize/simple.ll +++ b/llvm/test/Transforms/BBVectorize/simple.ll @@ -1,199 +1,209 @@ -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + ; Basic depth-3 chain define double @test1(double %A1, double %A2, double %B1, double %B2) { ; CHECK-LABEL: @test1( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 -; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 - %R = fmul double %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain (last pair permuted) define double @test2(double %A1, double %A2, double %B1, double %B2) { ; CHECK-LABEL: @test2( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 -; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 - %Z1 = fadd double %Y2, %B1 - %Z2 = fadd double %Y1, %B2 -; CHECK: %Z1.v.i1.1 = insertelement <2 x double> undef, double %B2, i32 0 -; CHECK: %Z1.v.i1.2 = insertelement <2 x double> %Z1.v.i1.1, double %B1, i32 1 -; CHECK: %Z2 = fadd <2 x double> %Y1, %Z1.v.i1.2 - %R = fmul double %Z1, %Z2 -; CHECK: %Z2.v.r1 = extractelement <2 x double> %Z2, i32 0 -; CHECK: %Z2.v.r2 = extractelement <2 x double> %Z2, i32 1 -; CHECK: %R = fmul double %Z2.v.r2, %Z2.v.r1 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0 +; CHECK-NEXT: [[Z1_V_I1_2:%.*]] = insertelement <2 x double> [[Z1_V_I1_1]], double [[B1]], i32 1 +; CHECK-NEXT: [[Z2:%.*]] = fadd <2 x double> [[Y1]], [[Z1_V_I1_2]] +; CHECK-NEXT: [[Z2_V_R1:%.*]] = extractelement <2 x double> [[Z2]], i32 0 +; CHECK-NEXT: [[Z2_V_R2:%.*]] = extractelement <2 x double> [[Z2]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z2_V_R2]], [[Z2_V_R1]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y2, %B1 + %Z2 = fadd double %Y1, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain (last pair first splat) define double @test3(double %A1, double %A2, double %B1, double %B2) { ; CHECK-LABEL: @test3( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 -; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 - %Z1 = fadd double %Y2, %B1 - %Z2 = fadd double %Y2, %B2 -; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 1> -; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2 - %R = fmul double %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1_V_I0:%.*]] = shufflevector <2 x double> [[Y1]], <2 x double> undef, <2 x i32> <i32 1, i32 1> +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Z1_V_I0]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y2, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain (last pair second splat) define double @test4(double %A1, double %A2, double %B1, double %B2) { ; CHECK-LABEL: @test4( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 -; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y1, %B2 -; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> zeroinitializer -; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2 - %R = fmul double %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1_V_I0:%.*]] = shufflevector <2 x double> [[Y1]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Z1_V_I0]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y1, %B2 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain define <2 x float> @test5(<2 x float> %A1, <2 x float> %A2, <2 x float> %B1, <2 x float> %B2) { ; CHECK-LABEL: @test5( -; CHECK: %X1.v.i1 = shufflevector <2 x float> %B1, <2 x float> %B2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK: %X1.v.i0 = shufflevector <2 x float> %A1, <2 x float> %A2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - %X1 = fsub <2 x float> %A1, %B1 - %X2 = fsub <2 x float> %A2, %B2 -; CHECK: %X1 = fsub <4 x float> %X1.v.i0, %X1.v.i1 - %Y1 = fmul <2 x float> %X1, %A1 - %Y2 = fmul <2 x float> %X2, %A2 -; CHECK: %Y1 = fmul <4 x float> %X1, %X1.v.i0 - %Z1 = fadd <2 x float> %Y1, %B1 - %Z2 = fadd <2 x float> %Y2, %B2 -; CHECK: %Z1 = fadd <4 x float> %Y1, %X1.v.i1 - %R = fmul <2 x float> %Z1, %Z2 -; CHECK: %Z1.v.r1 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 0, i32 1> -; CHECK: %Z1.v.r2 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 2, i32 3> -; CHECK: %R = fmul <2 x float> %Z1.v.r1, %Z1.v.r2 - ret <2 x float> %R -; CHECK: ret <2 x float> %R +; CHECK-NEXT: [[X1_V_I1:%.*]] = shufflevector <2 x float> [[B1:%.*]], <2 x float> [[B2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[X1_V_I0:%.*]] = shufflevector <2 x float> [[A1:%.*]], <2 x float> [[A2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[X1:%.*]] = fsub <4 x float> [[X1_V_I0]], [[X1_V_I1]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <4 x float> [[X1]], [[X1_V_I0]] +; CHECK-NEXT: [[Z1:%.*]] = fadd <4 x float> [[Y1]], [[X1_V_I1]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = shufflevector <4 x float> [[Z1]], <4 x float> undef, <2 x i32> <i32 0, i32 1> +; CHECK-NEXT: [[Z1_V_R2:%.*]] = shufflevector <4 x float> [[Z1]], <4 x float> undef, <2 x i32> <i32 2, i32 3> +; CHECK-NEXT: [[R:%.*]] = fmul <2 x float> [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret <2 x float> [[R]] +; + %X1 = fsub <2 x float> %A1, %B1 + %X2 = fsub <2 x float> %A2, %B2 + %Y1 = fmul <2 x float> %X1, %A1 + %Y2 = fmul <2 x float> %X2, %A2 + %Z1 = fadd <2 x float> %Y1, %B1 + %Z2 = fadd <2 x float> %Y2, %B2 + %R = fmul <2 x float> %Z1, %Z2 + ret <2 x float> %R } ; Basic chain with shuffles define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) { ; CHECK-LABEL: @test6( -; CHECK: %X1.v.i1 = shufflevector <8 x i8> %B1, <8 x i8> %B2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK: %X1.v.i0 = shufflevector <8 x i8> %A1, <8 x i8> %A2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %X1 = sub <8 x i8> %A1, %B1 - %X2 = sub <8 x i8> %A2, %B2 -; CHECK: %X1 = sub <16 x i8> %X1.v.i0, %X1.v.i1 - %Y1 = mul <8 x i8> %X1, %A1 - %Y2 = mul <8 x i8> %X2, %A2 -; CHECK: %Y1 = mul <16 x i8> %X1, %X1.v.i0 - %Z1 = add <8 x i8> %Y1, %B1 - %Z2 = add <8 x i8> %Y2, %B2 -; CHECK: %Z1 = add <16 x i8> %Y1, %X1.v.i1 - %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3> - %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1> -; CHECK: %Q1.v.i1 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <16 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> -; CHECK: %Q1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <16 x i32> <i32 23, i32 16, i32 6, i32 1, i32 21, i32 18, i32 4, i32 3, i32 14, i32 15, i32 8, i32 9, i32 10, i32 12, i32 12, i32 9> - %R = mul <8 x i8> %Q1, %Q2 -; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK: %Q1.v.r2 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK: %R = mul <8 x i8> %Q1.v.r1, %Q1.v.r2 - ret <8 x i8> %R -; CHECK: ret <8 x i8> %R +; CHECK-NEXT: [[X1_V_I1:%.*]] = shufflevector <8 x i8> [[B1:%.*]], <8 x i8> [[B2:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEXT: [[X1_V_I0:%.*]] = shufflevector <8 x i8> [[A1:%.*]], <8 x i8> [[A2:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEXT: [[X1:%.*]] = sub <16 x i8> [[X1_V_I0]], [[X1_V_I1]] +; CHECK-NEXT: [[Y1:%.*]] = mul <16 x i8> [[X1]], [[X1_V_I0]] +; CHECK-NEXT: [[Z1:%.*]] = add <16 x i8> [[Y1]], [[X1_V_I1]] +; CHECK-NEXT: [[Q1_V_I1:%.*]] = shufflevector <16 x i8> [[Z1]], <16 x i8> undef, <16 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> +; CHECK-NEXT: [[Q1:%.*]] = shufflevector <16 x i8> [[Z1]], <16 x i8> [[Q1_V_I1]], <16 x i32> <i32 23, i32 16, i32 6, i32 1, i32 21, i32 18, i32 4, i32 3, i32 14, i32 15, i32 8, i32 9, i32 10, i32 12, i32 12, i32 9> +; CHECK-NEXT: [[Q1_V_R1:%.*]] = shufflevector <16 x i8> [[Q1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[Q1_V_R2:%.*]] = shufflevector <16 x i8> [[Q1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEXT: [[R:%.*]] = mul <8 x i8> [[Q1_V_R1]], [[Q1_V_R2]] +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %X1 = sub <8 x i8> %A1, %B1 + %X2 = sub <8 x i8> %A2, %B2 + %Y1 = mul <8 x i8> %X1, %A1 + %Y2 = mul <8 x i8> %X2, %A2 + %Z1 = add <8 x i8> %Y1, %B1 + %Z2 = add <8 x i8> %Y2, %B2 + %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3> + %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1> + %R = mul <8 x i8> %Q1, %Q2 + ret <8 x i8> %R } ; Basic depth-3 chain (flipped order) define double @test7(double %A1, double %A2, double %B1, double %B2) { ; CHECK-LABEL: @test7( -; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 -; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 - %Z2 = fadd double %Y2, %B2 - %Z1 = fadd double %Y1, %B1 -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 - %R = fmul double %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z2 = fadd double %Y2, %B2 + %Z1 = fadd double %Y1, %B1 + %R = fmul double %Z1, %Z2 + ret double %R } ; Basic depth-3 chain (subclass data) define i64 @test8(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { ; CHECK-LABEL: @test8( -; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 -; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 -; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 -; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 - %X1 = sub nsw i64 %A1, %B1 - %X2 = sub i64 %A2, %B2 -; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 - %Y1 = mul i64 %X1, %A1 - %Y2 = mul i64 %X2, %A2 -; CHECK: %Y1 = mul <2 x i64> %X1, %X1.v.i0.2 - %Z1 = add i64 %Y1, %B1 - %Z2 = add i64 %Y2, %B2 -; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 - %R = mul i64 %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 -; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 - ret i64 %R -; CHECK: ret i64 %R +; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Y1:%.*]] = mul <2 x i64> [[X1]], [[X1_V_I0_2]] +; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]] +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1 +; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: ret i64 [[R]] +; + %X1 = sub nsw i64 %A1, %B1 + %X2 = sub i64 %A2, %B2 + %Y1 = mul i64 %X1, %A1 + %Y2 = mul i64 %X2, %A2 + %Z1 = add i64 %Y1, %B1 + %Z2 = add i64 %Y2, %B2 + %R = mul i64 %Z1, %Z2 + ret i64 %R } diff --git a/llvm/test/Transforms/BBVectorize/simple3.ll b/llvm/test/Transforms/BBVectorize/simple3.ll index 6edf7f07ac1..7dd538bdfb0 100644 --- a/llvm/test/Transforms/BBVectorize/simple3.ll +++ b/llvm/test/Transforms/BBVectorize/simple3.ll @@ -1,35 +1,38 @@ -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + ; Basic depth-3 chain define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) { ; CHECK-LABEL: @test1( -; CHECK: %X1.v.i1.11 = insertelement <3 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i1.22 = insertelement <3 x double> %X1.v.i1.11, double %B2, i32 1 -; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.22, double %B3, i32 2 -; CHECK: %X1.v.i0.13 = insertelement <3 x double> undef, double %A1, i32 0 -; CHECK: %X1.v.i0.24 = insertelement <3 x double> %X1.v.i0.13, double %A2, i32 1 -; CHECK: %X1.v.i0 = insertelement <3 x double> %X1.v.i0.24, double %A3, i32 2 - %X1 = fsub double %A1, %B1 - %X2 = fsub double %A2, %B2 - %X3 = fsub double %A3, %B3 -; CHECK: %X1 = fsub <3 x double> %X1.v.i0, %X1.v.i1 - %Y1 = fmul double %X1, %A1 - %Y2 = fmul double %X2, %A2 - %Y3 = fmul double %X3, %A3 -; CHECK: %Y1 = fmul <3 x double> %X1, %X1.v.i0 - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 - %Z3 = fadd double %Y3, %B3 -; CHECK: %Z1 = fadd <3 x double> %Y1, %X1.v.i1 - %R1 = fmul double %Z1, %Z2 - %R = fmul double %R1, %Z3 -; CHECK: %Z1.v.r210 = extractelement <3 x double> %Z1, i32 2 -; CHECK: %Z1.v.r1 = extractelement <3 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <3 x double> %Z1, i32 1 -; CHECK: %R1 = fmul double %Z1.v.r1, %Z1.v.r2 -; CHECK: %R = fmul double %R1, %Z1.v.r210 - ret double %R -; CHECK: ret double %R +; CHECK-NEXT: [[X1_V_I1_11:%.*]] = insertelement <3 x double> undef, double [[B1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I1_22:%.*]] = insertelement <3 x double> [[X1_V_I1_11]], double [[B2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I1:%.*]] = insertelement <3 x double> [[X1_V_I1_22]], double [[B3:%.*]], i32 2 +; CHECK-NEXT: [[X1_V_I0_13:%.*]] = insertelement <3 x double> undef, double [[A1:%.*]], i32 0 +; CHECK-NEXT: [[X1_V_I0_24:%.*]] = insertelement <3 x double> [[X1_V_I0_13]], double [[A2:%.*]], i32 1 +; CHECK-NEXT: [[X1_V_I0:%.*]] = insertelement <3 x double> [[X1_V_I0_24]], double [[A3:%.*]], i32 2 +; CHECK-NEXT: [[X1:%.*]] = fsub <3 x double> [[X1_V_I0]], [[X1_V_I1]] +; CHECK-NEXT: [[Y1:%.*]] = fmul <3 x double> [[X1]], [[X1_V_I0]] +; CHECK-NEXT: [[Z1:%.*]] = fadd <3 x double> [[Y1]], [[X1_V_I1]] +; CHECK-NEXT: [[Z1_V_R210:%.*]] = extractelement <3 x double> [[Z1]], i32 2 +; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <3 x double> [[Z1]], i32 0 +; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <3 x double> [[Z1]], i32 1 +; CHECK-NEXT: [[R1:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]] +; CHECK-NEXT: [[R:%.*]] = fmul double [[R1]], [[Z1_V_R210]] +; CHECK-NEXT: ret double [[R]] +; + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %X3 = fsub double %A3, %B3 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Y3 = fmul double %X3, %A3 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %Z3 = fadd double %Y3, %B3 + %R1 = fmul double %Z1, %Z2 + %R = fmul double %R1, %Z3 + ret double %R } diff --git a/llvm/test/Transforms/BBVectorize/vector-sel.ll b/llvm/test/Transforms/BBVectorize/vector-sel.ll index cb775ceae69..bc15073b5a1 100644 --- a/llvm/test/Transforms/BBVectorize/vector-sel.ll +++ b/llvm/test/Transforms/BBVectorize/vector-sel.ll @@ -1,16 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -bb-vectorize -S | FileCheck %s + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @d = external global [1 x [10 x [1 x i16]]], align 16 -;CHECK-LABEL: @test -;CHECK: %0 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3> -;CHECK: %1 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3> -;CHECK: %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -;CHECK: %3 = shufflevector <4 x i1> %boolvec, <4 x i1> %boolvec, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -;CHECK: %4 = select <8 x i1> %3, <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>, <8 x i16> %2 define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BOOL:%.*]] = icmp ne i32 undef, 0 +; CHECK-NEXT: [[BOOLVEC:%.*]] = icmp ne <4 x i32> undef, zeroinitializer +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[BOOL]], <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3> +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[BOOL]], <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[BOOLVEC]], <4 x i1> [[BOOLVEC]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>, <8 x i16> [[TMP2]] +; CHECK-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr ([1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 0) to <8 x i16>*), align 2 +; CHECK-NEXT: ret void +; entry: %bool = icmp ne i32 undef, 0 %boolvec = icmp ne <4 x i32> undef, zeroinitializer |

