summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/insertps-combine.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-01-22 12:17:48 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-01-22 12:17:48 +0000
commit180fcff5a7bca0b7bc2be14a21066f1eda3a7964 (patch)
treee6e197e24a625b87a21f10404ffc84ce6e884c23 /llvm/test/CodeGen/X86/insertps-combine.ll
parentcd26560e46c2a2a50dbc79775b9a8bba03990945 (diff)
downloadbcm5719-llvm-180fcff5a7bca0b7bc2be14a21066f1eda3a7964.tar.gz
bcm5719-llvm-180fcff5a7bca0b7bc2be14a21066f1eda3a7964.zip
[X86][SSE] Add selective commutation support for insertps (PR40340)
When we are inserting 1 "inline" element, and zeroing 2 of the other elements then we can safely commute the insertps source inputs to improve memory folding. Differential Revision: https://reviews.llvm.org/D56843 llvm-svn: 351807
Diffstat (limited to 'llvm/test/CodeGen/X86/insertps-combine.ll')
-rw-r--r--llvm/test/CodeGen/X86/insertps-combine.ll7
1 files changed, 2 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/X86/insertps-combine.ll b/llvm/test/CodeGen/X86/insertps-combine.ll
index 12773b61557..6bef76ee9df 100644
--- a/llvm/test/CodeGen/X86/insertps-combine.ll
+++ b/llvm/test/CodeGen/X86/insertps-combine.ll
@@ -302,15 +302,12 @@ define float @extract_lane_insertps_6123(<4 x float> %a0, <4 x float> *%p1) {
define <4 x float> @commute_load_insertps(<4 x float>, <4 x float>* nocapture readonly) {
; SSE-LABEL: commute_load_insertps:
; SSE: # %bb.0:
-; SSE-NEXT: movaps (%rdi), %xmm1
-; SSE-NEXT: insertps {{.*#+}} xmm1 = zero,xmm0[1],zero,xmm1[3]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[1],zero,mem[0]
; SSE-NEXT: retq
;
; AVX-LABEL: commute_load_insertps:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps (%rdi), %xmm1
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[1],zero,xmm1[3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[1],zero,mem[0]
; AVX-NEXT: retq
%3 = load <4 x float>, <4 x float>* %1
%4 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %3, <4 x float> %0, i8 85)
OpenPOWER on IntegriCloud