[X86] Don't fold into memory operands into insertps in the generated folding tables.

insertps behaves differently, the register form selects from an input register based on the immediate operand while the memory form just loads the given address. We have custom code to change the immediate in cases where that's legal, so completely remove insertps from the generated tables. llvm-svn: 304540
author: Benjamin Kramer <benny.kra@googlemail.com> 2017-06-02 10:50:22 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> 2017-06-02 10:50:22 +0000
commit: 19092d783c9660c95ac5d532fcb2f45fb131731e (patch)
tree: a003144a0b5e38a5622164bde4305e7496cb38f8
parent: 6a0644b24bbb151dc3141594c7e27f727c1474a6 (diff)
download: bcm5719-llvm-19092d783c9660c95ac5d532fcb2f45fb131731e.tar.gz
bcm5719-llvm-19092d783c9660c95ac5d532fcb2f45fb131731e.zip
2 files changed, 19 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
index a00d47bb13e..f937d484ce0 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -1926,5 +1926,19 @@ define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) {
   ret <8 x float> %6
 }
 
+define <4 x float> @stack_nofold_insertps(<8 x float> %a0, <8 x float> %a1) {
+; Cannot fold this without changing the immediate.
+; CHECK-LABEL: stack_nofold_insertps
+; CHECK:       32-byte Spill
+; CHECK:       nop
+; CHECK:       32-byte Reload
+; CHECK:       vinsertps $179, {{%xmm., %xmm., %xmm.}}
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %v0 = shufflevector <8 x float> %a0, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v1 = shufflevector <8 x float> %a1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v0, <4 x float> %v1, i8 179)
+  ret <4 x float> %res
+}
+
 attributes #0 = { "unsafe-fp-math"="false" }
 attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index b89cee2ce4b..34f5fbc6ea3 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -101,6 +101,11 @@ const char *const NoFoldSet[] = {
     "BTS16rr", "BTS32rr", "BTS64rr",
     "BTS16mr", "BTS32mr", "BTS64mr",
 
+    // insertps cannot be folded without adjusting the immediate. There's custom
+    // code to handle it in X86InstrInfo.cpp, ignore it here.
+    "INSERTPSrr", "INSERTPSrm",
+    "VINSERTPSrr", "VINSERTPSrm", "VINSERTPSZrr", "VINSERTPSZrm",
+
     // Memory folding is enabled only when optimizing for size by DAG
     // patterns only. (issue detailed in D28744 review)
     "VCVTSS2SDrm",            "VCVTSS2SDrr",
author	Benjamin Kramer <benny.kra@googlemail.com>	2017-06-02 10:50:22 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	2017-06-02 10:50:22 +0000
commit	19092d783c9660c95ac5d532fcb2f45fb131731e (patch)
tree	a003144a0b5e38a5622164bde4305e7496cb38f8
parent	6a0644b24bbb151dc3141594c7e27f727c1474a6 (diff)
download	bcm5719-llvm-19092d783c9660c95ac5d532fcb2f45fb131731e.tar.gz bcm5719-llvm-19092d783c9660c95ac5d532fcb2f45fb131731e.zip