diff options
author | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2020-01-02 13:37:34 -0600 |
---|---|---|
committer | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2020-01-02 13:45:54 -0600 |
commit | 781b78a3610886e265f08d6de625bfe19e3dd8cf (patch) | |
tree | e4128feaccd2b6b1fea367cbbc66a840ac2ce554 | |
parent | af340ae19f4bb109c63ca41663c97c45a39f512e (diff) | |
download | bcm5719-llvm-781b78a3610886e265f08d6de625bfe19e3dd8cf.tar.gz bcm5719-llvm-781b78a3610886e265f08d6de625bfe19e3dd8cf.zip |
[PowerPC] Only legalize FNEARBYINT with unsafe fp math
Commit 0f0330a78709 legalized these nodes on PPC without consideration of
unsafe math which means that we get inexact exceptions raised for nearbyint.
Since this doesn't conform to the standard, switch this legalization to depend
on unsafe fp math.
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll | 308 |
3 files changed, 260 insertions, 81 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9fab29dded7..df2234eea28 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -782,17 +782,22 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); + // The nearbyint variants are not allowed to raise the inexact exception + // so we can only code-gen them with unsafe math. + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + } + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); setOperationAction(ISD::MUL, MVT::v2f64, Legal); diff --git a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll index 94625fcf551..7f18853b8b9 100644 --- a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll @@ -342,14 +342,14 @@ declare i64 @llvm.llround.i64.f32(float) define dso_local double @test_nearbyint(double %d) local_unnamed_addr { ; BE-LABEL: test_nearbyint: -; BE: # %bb.0: # %entry -; BE-NEXT: xsrdpic f1, f1 -; BE-NEXT: blr +; BE: # %bb.0: # %entry +; BE: bl nearbyint +; BE: blr ; ; CHECK-LABEL: test_nearbyint: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsrdpic f1, f1 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK: bl nearbyint +; CHECK: blr ; ; FAST-LABEL: test_nearbyint: ; FAST: # %bb.0: # %entry @@ -364,14 +364,14 @@ declare double @llvm.nearbyint.f64(double) define dso_local float @test_nearbyintf(float %f) local_unnamed_addr { ; BE-LABEL: test_nearbyintf: -; BE: # %bb.0: # %entry -; BE-NEXT: xsrdpic f1, f1 -; BE-NEXT: blr +; BE: # %bb.0: # %entry +; BE: bl nearbyint +; BE: blr ; ; CHECK-LABEL: test_nearbyintf: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsrdpic f1, f1 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK: bl nearbyintf +; CHECK: blr ; ; FAST-LABEL: test_nearbyintf: ; FAST: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index ecd19fd677c..253e74cf0bf 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -5101,20 +5101,34 @@ entry: define <1 x float> @constrained_vector_nearbyint_v1f32() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v1f32: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) ; PC64LE-NEXT: addis 3, 2, .LCPI80_0@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI80_0@toc@l(3) -; PC64LE-NEXT: xsrdpic 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 +; PC64LE-NEXT: lfs 1, .LCPI80_0@toc@l(3) +; PC64LE-NEXT: bl nearbyintf +; PC64LE-NEXT: nop +; PC64LE-NEXT: xscvdpspn 0, 1 ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v1f32: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI80_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI80_0@toc@l(3) -; PC64LE9-NEXT: xsrdpic 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: lfs 1, .LCPI80_0@toc@l(3) +; PC64LE9-NEXT: bl nearbyintf +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32( @@ -5127,19 +5141,50 @@ entry: define <2 x double> @constrained_vector_nearbyint_v2f64() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -64(1) ; PC64LE-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI81_0@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xvrdpic 34, 0 +; PC64LE-NEXT: lfd 1, .LCPI81_0@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI81_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI81_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 34, 1, 0 +; PC64LE-NEXT: addi 1, 1, 64 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI81_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI81_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 34, 0 +; PC64LE9-NEXT: lfd 1, .LCPI81_0@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI81_1@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI81_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 48 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( @@ -5152,50 +5197,80 @@ entry: define <3 x float> @constrained_vector_nearbyint_v3f32() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v3f32: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI82_2@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI82_1@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI82_2@toc@l(3) -; PC64LE-NEXT: lfs 1, .LCPI82_1@toc@l(4) +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; PC64LE-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -48(1) ; PC64LE-NEXT: addis 3, 2, .LCPI82_0@toc@ha -; PC64LE-NEXT: xsrdpic 0, 0 -; PC64LE-NEXT: lfs 2, .LCPI82_0@toc@l(3) +; PC64LE-NEXT: lfs 1, .LCPI82_0@toc@l(3) +; PC64LE-NEXT: bl nearbyintf +; PC64LE-NEXT: nop +; PC64LE-NEXT: addis 3, 2, .LCPI82_1@toc@ha +; PC64LE-NEXT: fmr 31, 1 +; PC64LE-NEXT: lfs 1, .LCPI82_1@toc@l(3) +; PC64LE-NEXT: bl nearbyintf +; PC64LE-NEXT: nop +; PC64LE-NEXT: addis 3, 2, .LCPI82_2@toc@ha +; PC64LE-NEXT: fmr 30, 1 +; PC64LE-NEXT: lfs 1, .LCPI82_2@toc@l(3) +; PC64LE-NEXT: bl nearbyintf +; PC64LE-NEXT: nop +; PC64LE-NEXT: xscvdpspn 0, 30 ; PC64LE-NEXT: addis 3, 2, .LCPI82_3@toc@ha -; PC64LE-NEXT: xsrdpic 1, 1 -; PC64LE-NEXT: addi 3, 3, .LCPI82_3@toc@l -; PC64LE-NEXT: xsrdpic 2, 2 -; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 +; PC64LE-NEXT: addi 3, 3, .LCPI82_3@toc@l ; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 -; PC64LE-NEXT: xscvdpspn 0, 2 +; PC64LE-NEXT: xscvdpspn 0, 31 ; PC64LE-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE-NEXT: vmrglw 2, 3, 2 +; PC64LE-NEXT: vmrglw 2, 2, 3 ; PC64LE-NEXT: lvx 3, 0, 3 ; PC64LE-NEXT: xxsldwi 36, 0, 0, 1 ; PC64LE-NEXT: vperm 2, 4, 2, 3 +; PC64LE-NEXT: addi 1, 1, 48 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; PC64LE-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v3f32: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI82_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI82_0@toc@l(3) +; PC64LE9-NEXT: lfs 1, .LCPI82_0@toc@l(3) +; PC64LE9-NEXT: bl nearbyintf +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI82_1@toc@ha +; PC64LE9-NEXT: fmr 31, 1 ; PC64LE9-NEXT: lfs 1, .LCPI82_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyintf +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI82_2@toc@ha -; PC64LE9-NEXT: xsrdpic 0, 0 -; PC64LE9-NEXT: lfs 2, .LCPI82_2@toc@l(3) +; PC64LE9-NEXT: fmr 30, 1 +; PC64LE9-NEXT: lfs 1, .LCPI82_2@toc@l(3) +; PC64LE9-NEXT: bl nearbyintf +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xscvdpspn 0, 30 +; PC64LE9-NEXT: xxsldwi 35, 0, 0, 1 +; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addis 3, 2, .LCPI82_3@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI82_3@toc@l -; PC64LE9-NEXT: xsrdpic 1, 1 -; PC64LE9-NEXT: xsrdpic 2, 2 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 -; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1 -; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1 ; PC64LE9-NEXT: vmrglw 2, 3, 2 ; PC64LE9-NEXT: lxvx 35, 0, 3 +; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1 ; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: addi 1, 1, 48 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32( @@ -5208,31 +5283,72 @@ entry: define <3 x double> @constrained_vector_nearby_v3f64() #0 { ; PC64LE-LABEL: constrained_vector_nearby_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI83_1@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI83_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI83_0@toc@l(3) -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xsrdpic 3, 1 -; PC64LE-NEXT: xvrdpic 2, 0 -; PC64LE-NEXT: xxswapd 1, 2 -; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha +; PC64LE-NEXT: lfs 1, .LCPI83_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI83_2@toc@ha +; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 1, 0 +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearby_v3f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI83_0@toc@ha -; PC64LE9-NEXT: lfd 0, .LCPI83_0@toc@l(3) +; PC64LE9-NEXT: lfd 1, .LCPI83_0@toc@l(3) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI83_1@toc@l -; PC64LE9-NEXT: xsrdpic 3, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 2, 0 -; PC64LE9-NEXT: xxswapd 1, 2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfs 1, .LCPI83_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI83_2@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: lfd 1, .LCPI83_2@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: fmr 3, 1 +; PC64LE9-NEXT: xxswapd 1, 63 +; PC64LE9-NEXT: xscpsgndp 2, 63, 63 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64( @@ -5245,28 +5361,86 @@ entry: define <4 x double> @constrained_vector_nearbyint_v4f64() #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI84_1@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI84_0@toc@l -; PC64LE-NEXT: lxvd2x 0, 0, 3 -; PC64LE-NEXT: addi 3, 4, .LCPI84_1@toc@l -; PC64LE-NEXT: lxvd2x 1, 0, 3 -; PC64LE-NEXT: xxswapd 0, 0 -; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xvrdpic 34, 0 -; PC64LE-NEXT: xvrdpic 35, 1 +; PC64LE-NEXT: lfd 1, .LCPI84_0@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI84_1@toc@ha +; PC64LE-NEXT: lfd 1, .LCPI84_1@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addis 3, 2, .LCPI84_2@toc@ha +; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: lfd 1, .LCPI84_2@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI84_3@toc@ha +; PC64LE-NEXT: lfd 1, .LCPI84_3@toc@l(3) +; PC64LE-NEXT: bl nearbyint +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: vmr 2, 31 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxmrghd 35, 1, 0 +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI84_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI84_0@toc@l -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: lfd 1, .LCPI84_0@toc@l(3) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI84_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI84_1@toc@l -; PC64LE9-NEXT: xvrdpic 34, 0 -; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvrdpic 35, 0 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI84_1@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addis 3, 2, .LCPI84_2@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: lfd 1, .LCPI84_2@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addis 3, 2, .LCPI84_3@toc@ha +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: lfd 1, .LCPI84_3@toc@l(3) +; PC64LE9-NEXT: bl nearbyint +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: vmr 2, 31 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 1, 0 +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( |