diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_insert-5.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining.ll | 13 |
7 files changed, 35 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 0571dd28de4..98779351224 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3709,6 +3709,22 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } + case X86::MOVHLPSrr: + case X86::UNPCKHPDrr: { + if (!Subtarget.hasSSE2()) + return nullptr; + + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: llvm_unreachable("Unreachable!"); + case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break; + case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break; + } + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.setDesc(get(Opc)); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 870d8493675..15d6d6d440c 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1371,6 +1371,7 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; + let isCommutable = 1 in def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", @@ -2641,7 +2642,8 @@ let Predicates = [UseSSE2] in { multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, PatFrag mem_frag, RegisterClass RC, X86MemOperand x86memop, string asm, - Domain d> { + Domain d, bit IsCommutable = 0> { + let isCommutable = IsCommutable in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, @@ -2689,7 +2691,7 @@ let Constraints = "$src1 = $dst" in { SSEPackedSingle>, PS; defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble>, PD; + SSEPackedDouble, 1>, PD; defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", SSEPackedSingle>, PS; diff --git a/llvm/test/CodeGen/X86/sse2.ll b/llvm/test/CodeGen/X86/sse2.ll index 85e57e0dbdd..37a003fb240 100644 --- a/llvm/test/CodeGen/X86/sse2.ll +++ b/llvm/test/CodeGen/X86/sse2.ll @@ -182,10 +182,10 @@ define void @test12() nounwind { ; CHECK-NEXT: movapd 0, %xmm0 ; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] ; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; CHECK-NEXT: xorpd %xmm2, %xmm2 -; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] -; CHECK-NEXT: addps %xmm1, %xmm0 -; CHECK-NEXT: movaps %xmm0, 0 +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; CHECK-NEXT: addps %xmm1, %xmm2 +; CHECK-NEXT: movaps %xmm2, 0 ; CHECK-NEXT: retl %tmp1 = load <4 x float>, <4 x float>* null ; <<4 x float>> [#uses=2] %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1] diff --git a/llvm/test/CodeGen/X86/vec_insert-5.ll b/llvm/test/CodeGen/X86/vec_insert-5.ll index 67875b3ef23..a37c377e890 100644 --- a/llvm/test/CodeGen/X86/vec_insert-5.ll +++ b/llvm/test/CodeGen/X86/vec_insert-5.ll @@ -58,16 +58,14 @@ define <4 x float> @t3(<4 x float>* %P) nounwind { ; X32-LABEL: t3: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movapd (%eax), %xmm0 -; X32-NEXT: xorpd %xmm1, %xmm1 -; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; X32-NEXT: xorps %xmm0, %xmm0 +; X32-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; X32-NEXT: retl ; ; X64-LABEL: t3: ; X64: # BB#0: -; X64-NEXT: movapd (%rdi), %xmm0 -; X64-NEXT: xorpd %xmm1, %xmm1 -; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; X64-NEXT: retq %tmp1 = load <4 x float>, <4 x float>* %P %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 > diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll index 0d50205aa4a..0a94f31be66 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -154,7 +154,7 @@ define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_11: ; SSE: # BB#0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_11: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index a1c5a97b4de..604de26ab76 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -319,8 +319,7 @@ define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) { define <4 x float> @shuffle_v4f32_6723(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: shuffle_v4f32_6723: ; SSE: # BB#0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4f32_6723: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index b0505192fe8..6170eda1971 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -1406,8 +1406,7 @@ define <4 x float> @combine_test3(<4 x float> %a, <4 x float> %b) { define <4 x float> @combine_test4(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_test4: ; SSE: # BB#0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test4: @@ -2326,8 +2325,7 @@ define <4 x float> @combine_undef_input_test3(<4 x float> %a, <4 x float> %b) { define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_undef_input_test4: ; SSE: # BB#0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test4: @@ -2432,7 +2430,7 @@ define <4 x float> @combine_undef_input_test8(<4 x float> %a) { define <4 x float> @combine_undef_input_test9(<4 x float> %a) { ; SSE-LABEL: combine_undef_input_test9: ; SSE: # BB#0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test9: @@ -2511,8 +2509,7 @@ define <4 x float> @combine_undef_input_test13(<4 x float> %a, <4 x float> %b) { define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_undef_input_test14: ; SSE: # BB#0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test14: @@ -2623,7 +2620,7 @@ define <4 x float> @combine_undef_input_test18(<4 x float> %a) { define <4 x float> @combine_undef_input_test19(<4 x float> %a) { ; SSE-LABEL: combine_undef_input_test19: ; SSE: # BB#0: -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test19: |

