summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp28
-rw-r--r--llvm/test/CodeGen/X86/2012-07-15-broadcastfold.ll1
2 files changed, 12 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 947cd01308b..a87cc804a44 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7850,27 +7850,21 @@ static SDValue lowerVectorShuffleAsBroadcast(MVT VT, SDLoc DL, SDValue V,
"a sorted mask where the broadcast "
"comes from V1.");
- // Check if this is a broadcast of a scalar load -- those are more widely
- // supported than broadcasting in-register values.
+ // Check if this is a broadcast of a scalar. We special case lowering for
+ // scalars so that we can more effectively fold with loads.
if (V.getOpcode() == ISD::BUILD_VECTOR ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
- SDValue BroadcastV = V.getOperand(BroadcastIdx);
- if (ISD::isNON_EXTLoad(BroadcastV.getNode())) {
- // We can directly broadcast from memory.
- return DAG.getNode(X86ISD::VBROADCAST, DL, VT, BroadcastV);
- }
- }
-
- // We can't broadcast from a register w/o AVX2.
- if (!Subtarget->hasAVX2())
- return SDValue();
+ V = V.getOperand(BroadcastIdx);
- // Check if this is a broadcast of a BUILD_VECTOR which we can always handle,
- // or is a broadcast of the zero element.
- if (V.getOpcode() == ISD::BUILD_VECTOR)
- V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, V.getOperand(BroadcastIdx));
- else if (BroadcastIdx != 0)
+ // If the scalar isn't a load we can't broadcast from it in AVX1, only with
+ // AVX2.
+ if (!Subtarget->hasAVX2() && !ISD::isNON_EXTLoad(V.getNode()))
+ return SDValue();
+ } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
+ // We can't broadcast from a vector register w/o AVX2, and we can only
+ // broadcast from the zero-element of a vector register.
return SDValue();
+ }
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, V);
}
diff --git a/llvm/test/CodeGen/X86/2012-07-15-broadcastfold.ll b/llvm/test/CodeGen/X86/2012-07-15-broadcastfold.ll
index 1c39c747cdc..519c7cac736 100644
--- a/llvm/test/CodeGen/X86/2012-07-15-broadcastfold.ll
+++ b/llvm/test/CodeGen/X86/2012-07-15-broadcastfold.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s
declare x86_fastcallcc i64 @barrier()
OpenPOWER on IntegriCloud