summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-05-24 10:03:11 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-05-24 10:03:11 +0000
commit95b8d9bbf852428fc738bb93c78ef7b00f39341f (patch)
treecfd2ae0884d113480973daa6a1b39041ad8baa68 /llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
parent980f7605156e76fe2310fccca1581e5860c1512e (diff)
downloadbcm5719-llvm-95b8d9bbf852428fc738bb93c78ef7b00f39341f.tar.gz
bcm5719-llvm-95b8d9bbf852428fc738bb93c78ef7b00f39341f.zip
[SelectionDAG] computeKnownBits - support constant pool values from target
This patch adds the overridable TargetLowering::getTargetConstantFromLoad function which allows targets to return any constant value loaded by a LoadSDNode node - only X86 makes use of this so far but everything should be in place for other targets. computeKnownBits then uses this function to improve codegen, notably vector code after legalization. A future commit will do the same for ComputeNumSignBits but computeKnownBits sees the bigger benefit. This required a couple of fixes: * SimplifyDemandedBits must early-out for getTargetConstantFromLoad cases to prevent infinite loops of constant regeneration (similar to what we already do for BUILD_VECTOR). * Fix a DAGCombiner::visitTRUNCATE issue as we had trunc(shl(v8i32),v8i16) <-> shl(trunc(v8i16),v8i32) infinite loops after legalization on AVX512 targets. Differential Revision: https://reviews.llvm.org/D61887 llvm-svn: 361620
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll24
1 files changed, 8 insertions, 16 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
index fc3bb0350af..546f723e68b 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
@@ -339,9 +339,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
; SSE2-LABEL: var_shift_v8i8:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm0, %xmm2
-; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
; SSE2-NEXT: psllw $12, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: psraw $15, %xmm0
@@ -505,9 +503,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
; X32-SSE-LABEL: var_shift_v8i8:
; X32-SSE: # %bb.0:
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; X32-SSE-NEXT: pand %xmm0, %xmm2
-; X32-SSE-NEXT: pand %xmm0, %xmm1
+; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2
; X32-SSE-NEXT: psllw $12, %xmm1
; X32-SSE-NEXT: movdqa %xmm1, %xmm0
; X32-SSE-NEXT: psraw $15, %xmm0
@@ -1122,11 +1118,9 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
; SSE2-LABEL: splatvar_shift_v8i8:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm0, %xmm2
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
; SSE2-NEXT: psllw $12, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: psraw $15, %xmm0
@@ -1287,11 +1281,9 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
; X32-SSE-LABEL: splatvar_shift_v8i8:
; X32-SSE: # %bb.0:
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; X32-SSE-NEXT: pand %xmm0, %xmm2
-; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
-; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT: pand %xmm0, %xmm1
+; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2
+; X32-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
; X32-SSE-NEXT: psllw $12, %xmm1
; X32-SSE-NEXT: movdqa %xmm1, %xmm0
; X32-SSE-NEXT: psraw $15, %xmm0
OpenPOWER on IntegriCloud