diff options
author | Craig Topper <craig.topper@intel.com> | 2018-07-10 22:02:23 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-07-10 22:02:23 +0000 |
commit | 860ab496d32406f19010e9876a6c8b25d466ea10 (patch) | |
tree | 57f26f613e628da597997d07fc91a0a95783153f /llvm | |
parent | a929fd7f25a26b52a003f86bc8b34a4b76136ee3 (diff) | |
download | bcm5719-llvm-860ab496d32406f19010e9876a6c8b25d466ea10.tar.gz bcm5719-llvm-860ab496d32406f19010e9876a6c8b25d466ea10.zip |
[X86] Teach X86InstrInfo::commuteInstructionImpl to use MOVSD/MOVSS for BLEND under optsize when the immediate allows it.
Isel currently emits movss/movsd a lot of the time and an accidental double commute turns it into a blend.
Ideally we'd select blend directly in isel under optspeed and not rely on the double commute to create blend.
llvm-svn: 336731
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 22 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse41.ll | 8 |
3 files changed, 29 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 9ba408afe84..700e34ad0d0 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1547,9 +1547,29 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, } case X86::BLENDPDrri: case X86::BLENDPSrri: - case X86::PBLENDWrri: case X86::VBLENDPDrri: case X86::VBLENDPSrri: + // If we're optimizing for size, try to use MOVSD/MOVSS. + if (MI.getParent()->getParent()->getFunction().optForSize()) { + unsigned Mask, Opc; + switch (MI.getOpcode()) { + default: llvm_unreachable("Unreachable!"); + case X86::BLENDPDrri: Opc = X86::MOVSDrr; Mask = 0x03; break; + case X86::BLENDPSrri: Opc = X86::MOVSSrr; Mask = 0x0F; break; + case X86::VBLENDPDrri: Opc = X86::VMOVSDrr; Mask = 0x03; break; + case X86::VBLENDPSrri: Opc = X86::VMOVSSrr; Mask = 0x0F; break; + } + if ((MI.getOperand(3).getImm() ^ Mask) == 1) { + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.setDesc(get(Opc)); + WorkingMI.RemoveOperand(3); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, + /*NewMI=*/false, + OpIdx1, OpIdx2); + } + } + LLVM_FALLTHROUGH; + case X86::PBLENDWrri: case X86::VBLENDPDYrri: case X86::VBLENDPSYrri: case X86::VPBLENDDrri: diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll index f29b474ea0b..54529b177e5 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -820,8 +820,8 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, <4 x ; X86-AVX1: ## %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] ; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08] -; X86-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] -; X86-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] +; X86-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1] +; X86-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1] ; X86-AVX1-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize: @@ -842,8 +842,8 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, <4 x ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize: ; X64-AVX1: ## %bb.0: ; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f] -; X64-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] -; X64-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] +; X64-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1] +; X64-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1] ; X64-AVX1-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize: diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll index 009eca7745b..935aa36de2c 100644 --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -394,7 +394,7 @@ define <4 x float> @insertps_or_blendps(<4 x float> %t1, float %t2) minsize noun ; X86-SSE: ## %bb.0: ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04] ; X86-SSE-NEXT: ## xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: blendps $1, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x01] +; X86-SSE-NEXT: movss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x10,0xc1] ; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3] ; X86-SSE-NEXT: retl ## encoding: [0xc3] ; @@ -402,7 +402,7 @@ define <4 x float> @insertps_or_blendps(<4 x float> %t1, float %t2) minsize noun ; X86-AVX1: ## %bb.0: ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04] ; X86-AVX1-NEXT: ## xmm1 = mem[0],zero,zero,zero -; X86-AVX1-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] +; X86-AVX1-NEXT: vmovss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x10,0xc1] ; X86-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3] ; X86-AVX1-NEXT: retl ## encoding: [0xc3] ; @@ -416,13 +416,13 @@ define <4 x float> @insertps_or_blendps(<4 x float> %t1, float %t2) minsize noun ; ; X64-SSE-LABEL: insertps_or_blendps: ; X64-SSE: ## %bb.0: -; X64-SSE-NEXT: blendps $1, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x01] +; X64-SSE-NEXT: movss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x10,0xc1] ; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3] ; X64-SSE-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX1-LABEL: insertps_or_blendps: ; X64-AVX1: ## %bb.0: -; X64-AVX1-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] +; X64-AVX1-NEXT: vmovss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x10,0xc1] ; X64-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3] ; X64-AVX1-NEXT: retq ## encoding: [0xc3] ; |