diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-02-13 04:19:26 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-02-13 04:19:26 +0000 |
| commit | df99baa4df985cfd9a1517b4ca79b41c179f4773 (patch) | |
| tree | 101c7543d69bf83420a46a2d7a371113bad0e9ef | |
| parent | 4b89cc1b96682a14c6f95cf431cb946bc3f62e0c (diff) | |
| download | bcm5719-llvm-df99baa4df985cfd9a1517b4ca79b41c179f4773.tar.gz bcm5719-llvm-df99baa4df985cfd9a1517b4ca79b41c179f4773.zip | |
[X86] Teach EVEX->VEX pass to turn VRNDSCALE into VROUND when bits 7:4 of the immediate are 0 and the regular EVEX->VEX checks pass.
Bits 7:4 control the scale part of the operation. If the scale is 0 the behavior is equivalent to VROUND.
Fixes PR36246
llvm-svn: 324985
| -rw-r--r-- | llvm/lib/Target/X86/X86EvexToVex.cpp | 29 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-schedule.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-scalar.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/rounding-ops.ll | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse41-schedule.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_floor.ll | 40 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_ss_load_fold.ll | 33 | ||||
| -rw-r--r-- | llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp | 20 |
12 files changed, 112 insertions, 80 deletions
diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp index 6dd4631a484..c9d936906f4 100644 --- a/llvm/lib/Target/X86/X86EvexToVex.cpp +++ b/llvm/lib/Target/X86/X86EvexToVex.cpp @@ -164,7 +164,7 @@ static bool usesExtendedRegister(const MachineInstr &MI) { } // Do any custom cleanup needed to finalize the conversion. -static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { +static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { (void)NewOpc; unsigned Opc = MI.getOpcode(); switch (Opc) { @@ -197,7 +197,31 @@ static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1)); break; } + case X86::VRNDSCALEPDZ128rri: + case X86::VRNDSCALEPDZ128rmi: + case X86::VRNDSCALEPSZ128rri: + case X86::VRNDSCALEPSZ128rmi: + case X86::VRNDSCALEPDZ256rri: + case X86::VRNDSCALEPDZ256rmi: + case X86::VRNDSCALEPSZ256rri: + case X86::VRNDSCALEPSZ256rmi: + case X86::VRNDSCALESDr: + case X86::VRNDSCALESDm: + case X86::VRNDSCALESSr: + case X86::VRNDSCALESSm: + case X86::VRNDSCALESDr_Int: + case X86::VRNDSCALESDm_Int: + case X86::VRNDSCALESSr_Int: + case X86::VRNDSCALESSm_Int: + const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1); + int64_t ImmVal = Imm.getImm(); + // Ensure that only bits 3:0 of the immediate are used. + if ((ImmVal & 0xf) != ImmVal) + return false; + break; } + + return true; } @@ -260,7 +284,8 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const { if (usesExtendedRegister(MI)) return false; - performCustomAdjustments(MI, NewOpc); + if (!performCustomAdjustments(MI, NewOpc)) + return false; MI.setDesc(TII->get(NewOpc)); MI.setAsmPrinterFlag(AC_EVEX_2_VEX); diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index 748dd6804dd..41bf89e890e 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -599,7 +599,7 @@ define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { ; ; AVX512VL-LABEL: test_x86_avx_round_pd_256: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vrndscalepd $7, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x09,0xc0,0x07] +; AVX512VL-NEXT: vroundpd $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x09,0xc0,0x07] ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] ret <4 x double> %res @@ -615,7 +615,7 @@ define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { ; ; AVX512VL-LABEL: test_x86_avx_round_ps_256: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vrndscaleps $7, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x08,0xc0,0x07] +; AVX512VL-NEXT: vroundps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x08,0xc0,0x07] ; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] ret <8 x float> %res diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index 3f6f4eca59a..ad06954dbef 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -4118,8 +4118,8 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { ; ; SKX-LABEL: test_roundpd: ; SKX: # %bb.0: -; SKX-NEXT: vrndscalepd $7, %ymm0, %ymm0 # sched: [8:0.67] -; SKX-NEXT: vrndscalepd $7, (%rdi), %ymm1 # sched: [15:0.67] +; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:0.67] +; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:0.67] ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4182,8 +4182,8 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { ; ; SKX-LABEL: test_roundps: ; SKX: # %bb.0: -; SKX-NEXT: vrndscaleps $7, %ymm0, %ymm0 # sched: [8:0.67] -; SKX-NEXT: vrndscaleps $7, (%rdi), %ymm1 # sched: [15:0.67] +; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:0.67] +; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:0.67] ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index d76c26106c4..972fbc91602 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -26,7 +26,7 @@ declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double define <2 x double> @test_rndscale_sd(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_rndscale_sd: ; CHECK: ## %bb.0: -; CHECK-NEXT: vrndscalesd $11, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vroundsd $11, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 11, i32 4) ret <2 x double>%res @@ -70,7 +70,7 @@ declare <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float>, <4 x float>, define <4 x float> @test_rndscale_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: test_rndscale_ss: ; CHECK: ## %bb.0: -; CHECK-NEXT: vrndscaless $11, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vroundss $11, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4) ret <4 x float>%res @@ -79,7 +79,7 @@ define <4 x float> @test_rndscale_ss(<4 x float> %a, <4 x float> %b) { define <4 x float> @test_rndscale_ss_load(<4 x float> %a, <4 x float>* %bptr) { ; CHECK-LABEL: test_rndscale_ss_load: ; CHECK: ## %bb.0: -; CHECK-NEXT: vrndscaless $11, (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0 ; CHECK-NEXT: retq %b = load <4 x float>, <4 x float>* %bptr %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4) diff --git a/llvm/test/CodeGen/X86/avx512-scalar.ll b/llvm/test/CodeGen/X86/avx512-scalar.ll index 48901570feb..6d168213b93 100644 --- a/llvm/test/CodeGen/X86/avx512-scalar.ll +++ b/llvm/test/CodeGen/X86/avx512-scalar.ll @@ -55,7 +55,7 @@ declare float @llvm.sqrt.f32(float %Val) define float @test_trunc(float %a) { ; AVX512-LABEL: test_trunc: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaless $11, %xmm0, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc0,0x0b] +; AVX512-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0a,0xc0,0x0b] ; AVX512-NEXT: retq ## encoding: [0xc3] ; ; AVX-LABEL: test_trunc: @@ -83,7 +83,7 @@ define double @test_sqrt(double %a) { define float @test_rint(float %a) { ; AVX512-LABEL: test_rint: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaless $4, %xmm0, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc0,0x04] +; AVX512-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0a,0xc0,0x04] ; AVX512-NEXT: retq ## encoding: [0xc3] ; ; AVX-LABEL: test_rint: diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 78b1d21166f..752c0f73f59 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2701,7 +2701,7 @@ define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrndscaleps $88, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x08,0xc8,0x58] -; CHECK-NEXT: vrndscaleps $4, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x08,0xc0,0x04] +; CHECK-NEXT: vroundps $4, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x04] ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3) diff --git a/llvm/test/CodeGen/X86/rounding-ops.ll b/llvm/test/CodeGen/X86/rounding-ops.ll index 6e84635da29..df16760aff5 100644 --- a/llvm/test/CodeGen/X86/rounding-ops.ll +++ b/llvm/test/CodeGen/X86/rounding-ops.ll @@ -16,7 +16,7 @@ define float @test1(float %x) nounwind { ; ; CHECK-AVX512-LABEL: test1: ; CHECK-AVX512: ## %bb.0: -; CHECK-AVX512-NEXT: vrndscaless $9, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %call = tail call float @floorf(float %x) nounwind readnone ret float %call @@ -37,7 +37,7 @@ define double @test2(double %x) nounwind { ; ; CHECK-AVX512-LABEL: test2: ; CHECK-AVX512: ## %bb.0: -; CHECK-AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %call = tail call double @floor(double %x) nounwind readnone ret double %call @@ -58,7 +58,7 @@ define float @test3(float %x) nounwind { ; ; CHECK-AVX512-LABEL: test3: ; CHECK-AVX512: ## %bb.0: -; CHECK-AVX512-NEXT: vrndscaless $12, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %call = tail call float @nearbyintf(float %x) nounwind readnone ret float %call @@ -79,7 +79,7 @@ define double @test4(double %x) nounwind { ; ; CHECK-AVX512-LABEL: test4: ; CHECK-AVX512: ## %bb.0: -; CHECK-AVX512-NEXT: vrndscalesd $12, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %call = tail call double @nearbyint(double %x) nounwind readnone ret double %call @@ -100,7 +100,7 @@ define float @test5(float %x) nounwind { ; ; CHECK-AVX512-LABEL: test5: ; CHECK-AVX512: ## %bb.0: -; CHECK-AVX512-NEXT: vrndscaless $10, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %call = tail call float @ceilf(float %x) nounwind readnone ret float %call @@ -121,7 +121,7 @@ define double @test6(double %x) nounwind { ; ; CHECK-AVX512-LABEL: test6: ; CHECK-AVX512: ## %bb.0: -; CHECK-AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %call = tail call double @ceil(double %x) nounwind readnone ret double %call @@ -142,7 +142,7 @@ define float @test7(float %x) nounwind { ; ; CHECK-AVX512-LABEL: test7: ; CHECK-AVX512: ## %bb.0: -; CHECK-AVX512-NEXT: vrndscaless $4, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %call = tail call float @rintf(float %x) nounwind readnone ret float %call @@ -163,7 +163,7 @@ define double @test8(double %x) nounwind { ; ; CHECK-AVX512-LABEL: test8: ; CHECK-AVX512: ## %bb.0: -; CHECK-AVX512-NEXT: vrndscalesd $4, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %call = tail call double @rint(double %x) nounwind readnone ret double %call @@ -184,7 +184,7 @@ define float @test9(float %x) nounwind { ; ; CHECK-AVX512-LABEL: test9: ; CHECK-AVX512: ## %bb.0: -; CHECK-AVX512-NEXT: vrndscaless $11, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %call = tail call float @truncf(float %x) nounwind readnone ret float %call @@ -205,7 +205,7 @@ define double @test10(double %x) nounwind { ; ; CHECK-AVX512-LABEL: test10: ; CHECK-AVX512: ## %bb.0: -; CHECK-AVX512-NEXT: vrndscalesd $11, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %call = tail call double @trunc(double %x) nounwind readnone ret double %call diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll index 2c38904e4c7..f26044875f3 100644 --- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -458,7 +458,7 @@ define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { ; ; SKX-LABEL: test_x86_sse41_round_pd: ; SKX: ## %bb.0: -; SKX-NEXT: vrndscalepd $7, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x09,0xc0,0x07] +; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x09,0xc0,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] ret <2 x double> %res @@ -479,7 +479,7 @@ define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { ; ; SKX-LABEL: test_x86_sse41_round_ps: ; SKX: ## %bb.0: -; SKX-NEXT: vrndscaleps $7, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x08,0xc0,0x07] +; SKX-NEXT: vroundps $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -500,7 +500,7 @@ define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) ; ; SKX-LABEL: test_x86_sse41_round_sd: ; SKX: ## %bb.0: -; SKX-NEXT: vrndscalesd $7, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x0b,0xc1,0x07] +; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0b,0xc1,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] ret <2 x double> %res @@ -524,7 +524,7 @@ define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double> ; SKX-LABEL: test_x86_sse41_round_sd_load: ; SKX: ## %bb.0: ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SKX-NEXT: vrndscalesd $7, (%eax), %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x0b,0x00,0x07] +; SKX-NEXT: vroundsd $7, (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0b,0x00,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %a1b = load <2 x double>, <2 x double>* %a1 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1] @@ -545,7 +545,7 @@ define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { ; ; SKX-LABEL: test_x86_sse41_round_ss: ; SKX: ## %bb.0: -; SKX-NEXT: vrndscaless $7, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc1,0x07] +; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0a,0xc1,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll index ed5660071e0..60fc0e8e514 100644 --- a/llvm/test/CodeGen/X86/sse41-schedule.ll +++ b/llvm/test/CodeGen/X86/sse41-schedule.ll @@ -3024,8 +3024,8 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; ; SKX-LABEL: test_roundpd: ; SKX: # %bb.0: -; SKX-NEXT: vrndscalepd $7, %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: vrndscalepd $7, (%rdi), %xmm1 # sched: [14:0.67] +; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67] +; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:0.67] ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3096,8 +3096,8 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; ; SKX-LABEL: test_roundps: ; SKX: # %bb.0: -; SKX-NEXT: vrndscaleps $7, %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: vrndscaleps $7, (%rdi), %xmm1 # sched: [14:0.67] +; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67] +; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:0.67] ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3169,8 +3169,8 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; ; SKX-LABEL: test_roundsd: ; SKX: # %bb.0: -; SKX-NEXT: vrndscalesd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-NEXT: vrndscalesd $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] +; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] +; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3242,8 +3242,8 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; ; SKX-LABEL: test_roundss: ; SKX: # %bb.0: -; SKX-NEXT: vrndscaless $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-NEXT: vrndscaless $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] +; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] +; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] ; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/vec_floor.ll b/llvm/test/CodeGen/X86/vec_floor.ll index d01c6f6ea90..4093357d516 100644 --- a/llvm/test/CodeGen/X86/vec_floor.ll +++ b/llvm/test/CodeGen/X86/vec_floor.ll @@ -16,7 +16,7 @@ define <2 x double> @floor_v2f64(<2 x double> %p) { ; ; AVX512-LABEL: floor_v2f64: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscalepd $9, %xmm0, %xmm0 +; AVX512-NEXT: vroundpd $9, %xmm0, %xmm0 ; AVX512-NEXT: retq %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) ret <2 x double> %t @@ -36,7 +36,7 @@ define <4 x float> @floor_v4f32(<4 x float> %p) { ; ; AVX512-LABEL: floor_v4f32: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaleps $9, %xmm0, %xmm0 +; AVX512-NEXT: vroundps $9, %xmm0, %xmm0 ; AVX512-NEXT: retq %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) ret <4 x float> %t @@ -57,7 +57,7 @@ define <4 x double> @floor_v4f64(<4 x double> %p){ ; ; AVX512-LABEL: floor_v4f64: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscalepd $9, %ymm0, %ymm0 +; AVX512-NEXT: vroundpd $9, %ymm0, %ymm0 ; AVX512-NEXT: retq %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) ret <4 x double> %t @@ -78,7 +78,7 @@ define <8 x float> @floor_v8f32(<8 x float> %p) { ; ; AVX512-LABEL: floor_v8f32: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaleps $9, %ymm0, %ymm0 +; AVX512-NEXT: vroundps $9, %ymm0, %ymm0 ; AVX512-NEXT: retq %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) ret <8 x float> %t @@ -146,7 +146,7 @@ define <2 x double> @ceil_v2f64(<2 x double> %p) { ; ; AVX512-LABEL: ceil_v2f64: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscalepd $10, %xmm0, %xmm0 +; AVX512-NEXT: vroundpd $10, %xmm0, %xmm0 ; AVX512-NEXT: retq %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) ret <2 x double> %t @@ -166,7 +166,7 @@ define <4 x float> @ceil_v4f32(<4 x float> %p) { ; ; AVX512-LABEL: ceil_v4f32: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaleps $10, %xmm0, %xmm0 +; AVX512-NEXT: vroundps $10, %xmm0, %xmm0 ; AVX512-NEXT: retq %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) ret <4 x float> %t @@ -187,7 +187,7 @@ define <4 x double> @ceil_v4f64(<4 x double> %p) { ; ; AVX512-LABEL: ceil_v4f64: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscalepd $10, %ymm0, %ymm0 +; AVX512-NEXT: vroundpd $10, %ymm0, %ymm0 ; AVX512-NEXT: retq %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) ret <4 x double> %t @@ -208,7 +208,7 @@ define <8 x float> @ceil_v8f32(<8 x float> %p) { ; ; AVX512-LABEL: ceil_v8f32: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaleps $10, %ymm0, %ymm0 +; AVX512-NEXT: vroundps $10, %ymm0, %ymm0 ; AVX512-NEXT: retq %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) ret <8 x float> %t @@ -276,7 +276,7 @@ define <2 x double> @trunc_v2f64(<2 x double> %p) { ; ; AVX512-LABEL: trunc_v2f64: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscalepd $11, %xmm0, %xmm0 +; AVX512-NEXT: vroundpd $11, %xmm0, %xmm0 ; AVX512-NEXT: retq %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) ret <2 x double> %t @@ -296,7 +296,7 @@ define <4 x float> @trunc_v4f32(<4 x float> %p) { ; ; AVX512-LABEL: trunc_v4f32: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaleps $11, %xmm0, %xmm0 +; AVX512-NEXT: vroundps $11, %xmm0, %xmm0 ; AVX512-NEXT: retq %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) ret <4 x float> %t @@ -317,7 +317,7 @@ define <4 x double> @trunc_v4f64(<4 x double> %p) { ; ; AVX512-LABEL: trunc_v4f64: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscalepd $11, %ymm0, %ymm0 +; AVX512-NEXT: vroundpd $11, %ymm0, %ymm0 ; AVX512-NEXT: retq %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) ret <4 x double> %t @@ -338,7 +338,7 @@ define <8 x float> @trunc_v8f32(<8 x float> %p) { ; ; AVX512-LABEL: trunc_v8f32: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaleps $11, %ymm0, %ymm0 +; AVX512-NEXT: vroundps $11, %ymm0, %ymm0 ; AVX512-NEXT: retq %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) ret <8 x float> %t @@ -406,7 +406,7 @@ define <2 x double> @rint_v2f64(<2 x double> %p) { ; ; AVX512-LABEL: rint_v2f64: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscalepd $4, %xmm0, %xmm0 +; AVX512-NEXT: vroundpd $4, %xmm0, %xmm0 ; AVX512-NEXT: retq %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) ret <2 x double> %t @@ -426,7 +426,7 @@ define <4 x float> @rint_v4f32(<4 x float> %p) { ; ; AVX512-LABEL: rint_v4f32: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaleps $4, %xmm0, %xmm0 +; AVX512-NEXT: vroundps $4, %xmm0, %xmm0 ; AVX512-NEXT: retq %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) ret <4 x float> %t @@ -447,7 +447,7 @@ define <4 x double> @rint_v4f64(<4 x double> %p) { ; ; AVX512-LABEL: rint_v4f64: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscalepd $4, %ymm0, %ymm0 +; AVX512-NEXT: vroundpd $4, %ymm0, %ymm0 ; AVX512-NEXT: retq %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) ret <4 x double> %t @@ -468,7 +468,7 @@ define <8 x float> @rint_v8f32(<8 x float> %p) { ; ; AVX512-LABEL: rint_v8f32: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaleps $4, %ymm0, %ymm0 +; AVX512-NEXT: vroundps $4, %ymm0, %ymm0 ; AVX512-NEXT: retq %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) ret <8 x float> %t @@ -536,7 +536,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %p) { ; ; AVX512-LABEL: nearbyint_v2f64: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscalepd $12, %xmm0, %xmm0 +; AVX512-NEXT: vroundpd $12, %xmm0, %xmm0 ; AVX512-NEXT: retq %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) ret <2 x double> %t @@ -556,7 +556,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %p) { ; ; AVX512-LABEL: nearbyint_v4f32: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaleps $12, %xmm0, %xmm0 +; AVX512-NEXT: vroundps $12, %xmm0, %xmm0 ; AVX512-NEXT: retq %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) ret <4 x float> %t @@ -577,7 +577,7 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %p) { ; ; AVX512-LABEL: nearbyint_v4f64: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscalepd $12, %ymm0, %ymm0 +; AVX512-NEXT: vroundpd $12, %ymm0, %ymm0 ; AVX512-NEXT: retq %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) ret <4 x double> %t @@ -598,7 +598,7 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %p) { ; ; AVX512-LABEL: nearbyint_v8f32: ; AVX512: ## %bb.0: -; AVX512-NEXT: vrndscaleps $12, %ymm0, %ymm0 +; AVX512-NEXT: vroundps $12, %ymm0, %ymm0 ; AVX512-NEXT: retq %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) ret <8 x float> %t diff --git a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll index 8377c01d7b4..d9ab5a1df7c 100644 --- a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll +++ b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll @@ -176,27 +176,16 @@ define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind { ; X64-NEXT: roundss $4, (%rdi), %xmm0 ; X64-NEXT: retq ; -; X32_AVX1-LABEL: test3: -; X32_AVX1: ## %bb.0: -; X32_AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32_AVX1-NEXT: vroundss $4, (%eax), %xmm0, %xmm0 -; X32_AVX1-NEXT: retl -; -; X64_AVX1-LABEL: test3: -; X64_AVX1: ## %bb.0: -; X64_AVX1-NEXT: vroundss $4, (%rdi), %xmm0, %xmm0 -; X64_AVX1-NEXT: retq -; -; X32_AVX512-LABEL: test3: -; X32_AVX512: ## %bb.0: -; X32_AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32_AVX512-NEXT: vrndscaless $4, (%eax), %xmm0, %xmm0 -; X32_AVX512-NEXT: retl +; X32_AVX-LABEL: test3: +; X32_AVX: ## %bb.0: +; X32_AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32_AVX-NEXT: vroundss $4, (%eax), %xmm0, %xmm0 +; X32_AVX-NEXT: retl ; -; X64_AVX512-LABEL: test3: -; X64_AVX512: ## %bb.0: -; X64_AVX512-NEXT: vrndscaless $4, (%rdi), %xmm0, %xmm0 -; X64_AVX512-NEXT: retq +; X64_AVX-LABEL: test3: +; X64_AVX: ## %bb.0: +; X64_AVX-NEXT: vroundss $4, (%rdi), %xmm0, %xmm0 +; X64_AVX-NEXT: retq %a = load float , float *%b %B = insertelement <4 x float> undef, float %a, i32 0 %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4) @@ -254,7 +243,7 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind { ; X32_AVX512-NEXT: vmovaps %xmm0, (%esp) ## 16-byte Spill ; X32_AVX512-NEXT: calll _f ; X32_AVX512-NEXT: vmovaps (%esp), %xmm1 ## 16-byte Reload -; X32_AVX512-NEXT: vrndscaless $4, %xmm1, %xmm0, %xmm0 +; X32_AVX512-NEXT: vroundss $4, %xmm1, %xmm0, %xmm0 ; X32_AVX512-NEXT: addl $28, %esp ; X32_AVX512-NEXT: retl ; @@ -265,7 +254,7 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind { ; X64_AVX512-NEXT: vmovaps %xmm0, (%rsp) ## 16-byte Spill ; X64_AVX512-NEXT: callq _f ; X64_AVX512-NEXT: vmovaps (%rsp), %xmm1 ## 16-byte Reload -; X64_AVX512-NEXT: vrndscaless $4, %xmm1, %xmm0, %xmm0 +; X64_AVX512-NEXT: vroundss $4, %xmm1, %xmm0, %xmm0 ; X64_AVX512-NEXT: addq $24, %rsp ; X64_AVX512-NEXT: retq %a = load float , float *%b diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp index 05f30facd54..d979531d076 100644 --- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp @@ -67,7 +67,6 @@ private: "VPMULLQ", "VPSRAQ", "VDBPSADBW", - "VRNDSCALE", "VSCALEFPS" }; // Instruction's name starts with one of the entries in the exception list @@ -163,6 +162,25 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table, {"VSHUFI32X4Z256rri", "VPERM2I128rr", false}, {"VSHUFI64X2Z256rmi", "VPERM2I128rm", false}, {"VSHUFI64X2Z256rri", "VPERM2I128rr", false}, + + // These can be replaced if we verify the scale part of the immediate is + // zero. + {"VRNDSCALEPDZ128rri", "VROUNDPDr", true}, + {"VRNDSCALEPDZ128rmi", "VROUNDPDm", true}, + {"VRNDSCALEPSZ128rri", "VROUNDPSr", true}, + {"VRNDSCALEPSZ128rmi", "VROUNDPSm", true}, + {"VRNDSCALEPDZ256rri", "VROUNDYPDr", false}, + {"VRNDSCALEPDZ256rmi", "VROUNDYPDm", false}, + {"VRNDSCALEPSZ256rri", "VROUNDYPSr", false}, + {"VRNDSCALEPSZ256rmi", "VROUNDYPSm", false}, + {"VRNDSCALESDr", "VROUNDSDr", true}, + {"VRNDSCALESDm", "VROUNDSDm", true}, + {"VRNDSCALESSr", "VROUNDSSr", true}, + {"VRNDSCALESSm", "VROUNDSSm", true}, + {"VRNDSCALESDr_Int", "VROUNDSDr_Int", true}, + {"VRNDSCALESDm_Int", "VROUNDSDm_Int", true}, + {"VRNDSCALESSr_Int", "VROUNDSSr_Int", true}, + {"VRNDSCALESSm_Int", "VROUNDSSm_Int", true}, }; // Print the manually added entries |

