summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-04-25 13:51:57 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-04-25 13:51:57 +0000
commit48a3b545721aeb94f054b4aa044abfd9ac59bada (patch)
tree4029cbf32e3186f0750052d7d18b8c496b956583
parent0fc09d0d25d4f7d1068a0940ca1f03aad73ab7ea (diff)
downloadbcm5719-llvm-48a3b545721aeb94f054b4aa044abfd9ac59bada.tar.gz
bcm5719-llvm-48a3b545721aeb94f054b4aa044abfd9ac59bada.zip
[InstCombine][X86] Tweak generic expansion of PACKSS/PACKUS to shuffle then truncate. NFCI.
This has no effect on constant folding but will be useful when we expand non-saturating PACKSS/PACKUS intrinsics. llvm-svn: 359191
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp11
1 files changed, 4 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d8bc459b901..221ad5838a4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -593,12 +593,7 @@ static Value *simplifyX86pack(IntrinsicInst &II,
Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
- // Truncate clamped args to dst size.
- auto *TruncTy = VectorType::get(ResTy->getScalarType(), NumSrcElts);
- Arg0 = Builder.CreateTrunc(Arg0, TruncTy);
- Arg1 = Builder.CreateTrunc(Arg1, TruncTy);
-
- // Shuffle args together at the lane level.
+ // Shuffle clamped args together at the lane level.
SmallVector<unsigned, 32> PackMask;
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
@@ -606,8 +601,10 @@ static Value *simplifyX86pack(IntrinsicInst &II,
for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
}
+ auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
- return Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
+ // Truncate to dst size.
+ return Builder.CreateTrunc(Shuffle, ResTy);
}
// Replace X86-specific intrinsics with generic floor-ceil where applicable.
OpenPOWER on IntegriCloud