summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp16
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td6
-rw-r--r--llvm/test/CodeGen/X86/sse2.ll8
-rw-r--r--llvm/test/CodeGen/X86/vec_insert-5.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll2
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll3
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining.ll13
7 files changed, 35 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 0571dd28de4..98779351224 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3709,6 +3709,22 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
+ case X86::MOVHLPSrr:
+ case X86::UNPCKHPDrr: {
+ if (!Subtarget.hasSSE2())
+ return nullptr;
+
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break;
+ case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break;
+ }
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(Opc));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 870d8493675..15d6d6d440c 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1371,6 +1371,7 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
+ let isCommutable = 1 in
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}",
@@ -2641,7 +2642,8 @@ let Predicates = [UseSSE2] in {
multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
PatFrag mem_frag, RegisterClass RC,
X86MemOperand x86memop, string asm,
- Domain d> {
+ Domain d, bit IsCommutable = 0> {
+ let isCommutable = IsCommutable in
def rr : PI<opc, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
asm, [(set RC:$dst,
@@ -2689,7 +2691,7 @@ let Constraints = "$src1 = $dst" in {
SSEPackedSingle>, PS;
defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
- SSEPackedDouble>, PD;
+ SSEPackedDouble, 1>, PD;
defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
SSEPackedSingle>, PS;
diff --git a/llvm/test/CodeGen/X86/sse2.ll b/llvm/test/CodeGen/X86/sse2.ll
index 85e57e0dbdd..37a003fb240 100644
--- a/llvm/test/CodeGen/X86/sse2.ll
+++ b/llvm/test/CodeGen/X86/sse2.ll
@@ -182,10 +182,10 @@ define void @test12() nounwind {
; CHECK-NEXT: movapd 0, %xmm0
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; CHECK-NEXT: xorpd %xmm2, %xmm2
-; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
-; CHECK-NEXT: addps %xmm1, %xmm0
-; CHECK-NEXT: movaps %xmm0, 0
+; CHECK-NEXT: xorps %xmm2, %xmm2
+; CHECK-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; CHECK-NEXT: addps %xmm1, %xmm2
+; CHECK-NEXT: movaps %xmm2, 0
; CHECK-NEXT: retl
%tmp1 = load <4 x float>, <4 x float>* null ; <<4 x float>> [#uses=2]
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/vec_insert-5.ll b/llvm/test/CodeGen/X86/vec_insert-5.ll
index 67875b3ef23..a37c377e890 100644
--- a/llvm/test/CodeGen/X86/vec_insert-5.ll
+++ b/llvm/test/CodeGen/X86/vec_insert-5.ll
@@ -58,16 +58,14 @@ define <4 x float> @t3(<4 x float>* %P) nounwind {
; X32-LABEL: t3:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movapd (%eax), %xmm0
-; X32-NEXT: xorpd %xmm1, %xmm1
-; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; X32-NEXT: xorps %xmm0, %xmm0
+; X32-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; X32-NEXT: retl
;
; X64-LABEL: t3:
; X64: # BB#0:
-; X64-NEXT: movapd (%rdi), %xmm0
-; X64-NEXT: xorpd %xmm1, %xmm1
-; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; X64-NEXT: retq
%tmp1 = load <4 x float>, <4 x float>* %P
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
index 0d50205aa4a..0a94f31be66 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -154,7 +154,7 @@ define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: shuffle_v2f64_11:
; SSE: # BB#0:
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2f64_11:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index a1c5a97b4de..604de26ab76 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -319,8 +319,7 @@ define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) {
define <4 x float> @shuffle_v4f32_6723(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: shuffle_v4f32_6723:
; SSE: # BB#0:
-; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4f32_6723:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index b0505192fe8..6170eda1971 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -1406,8 +1406,7 @@ define <4 x float> @combine_test3(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_test4(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_test4:
; SSE: # BB#0:
-; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test4:
@@ -2326,8 +2325,7 @@ define <4 x float> @combine_undef_input_test3(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_undef_input_test4:
; SSE: # BB#0:
-; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test4:
@@ -2432,7 +2430,7 @@ define <4 x float> @combine_undef_input_test8(<4 x float> %a) {
define <4 x float> @combine_undef_input_test9(<4 x float> %a) {
; SSE-LABEL: combine_undef_input_test9:
; SSE: # BB#0:
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test9:
@@ -2511,8 +2509,7 @@ define <4 x float> @combine_undef_input_test13(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_undef_input_test14:
; SSE: # BB#0:
-; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test14:
@@ -2623,7 +2620,7 @@ define <4 x float> @combine_undef_input_test18(<4 x float> %a) {
define <4 x float> @combine_undef_input_test19(<4 x float> %a) {
; SSE-LABEL: combine_undef_input_test19:
; SSE: # BB#0:
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test19:
OpenPOWER on IntegriCloud