From 72b05aa59c0268f3404af9c3e61c706d1661fd03 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Thu, 11 Dec 2014 20:44:59 +0000 Subject: [InstCombine][X86] Improved folding of calls to Intrinsic::x86_sse4a_insertqi. This patch teaches the instruction combiner how to fold a call to 'insertqi' if the 'length field' (3rd operand) is set to zero, and if the sum between field 'length' and 'bit index' (4th operand) is bigger than 64. From the AMD64 Architecture Programmer's Manual: 1. If the sum of the bit index + length field is greater than 64, then the results are undefined; 2. A value of zero in the field length is defined as a length of 64. This patch improves the existing combining logic for intrinsic 'insertqi' adding extra checks to address both point 1. and point 2. Differential Revision: http://reviews.llvm.org/D6583 llvm-svn: 224054 --- .../Transforms/InstCombine/vec_demanded_elts.ll | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'llvm/test') diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll index 41d2b292eef..00a029aeab7 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -303,6 +303,33 @@ define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) { ret <2 x i64> %2 } +; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> %i + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0) + ret <2 x i64> %1 +} + +; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> undef + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16) + ret <2 x i64> %1 +} + +; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> undef + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32) + ret <2 x i64> %1 +} + +; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> undef + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16) + ret <2 x i64> %1 +} ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind -- cgit v1.2.3