diff options
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 17 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/vec_demanded_elts.ll | 27 | 
2 files changed, 43 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 8a0d91dfe51..33af7ee0eff 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -733,7 +733,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // TODO: eventually we should lower this intrinsic to IR      if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) {        if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) { -        if (CIWidth->equalsInt(64) && CIStart->isZero()) { +        unsigned Index = CIStart->getZExtValue(); +        // From AMD documentation: "a value of zero in the field length is +        // defined as length of 64". +        unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue(); + +        // From AMD documentation: "If the sum of the bit index + length field +        // is greater than 64, the results are undefined". + +        // Note that both field index and field length are 8-bit quantities. +        // Since variables 'Index' and 'Length' are unsigned values +        // obtained from zero-extending field index and field length +        // respectively, their sum should never wrap around. +        if ((Index + Length) > 64) +          return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + +        if (Length == 64 && Index == 0) {            Value *Vec = II->getArgOperand(1);            Value *Undef = UndefValue::get(Vec->getType());            const uint32_t Mask[] = { 0, 2 }; diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll index 41d2b292eef..00a029aeab7 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -303,6 +303,33 @@ define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {    ret <2 x i64> %2  } +; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> %i +  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0) +  ret <2 x i64> %1 +} + +; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> undef +  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16) +  ret <2 x i64> %1 +} + +; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> undef +  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32) +  ret <2 x i64> %1 +} + +; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> undef +  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16) +  ret <2 x i64> %1 +}  ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi  declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind  | 

