diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2019-04-03 22:40:35 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2019-04-03 22:40:35 +0000 |
| commit | c9a012e4eab7f2e9aa8701451b0bf7c6b80222ac (patch) | |
| tree | 27b0d0adb5db432c6fd53f3fb3e8aa57d71e4d2f /llvm | |
| parent | 144a43a2fdb863020ffbd06601619fead9a3a3fc (diff) | |
| download | bcm5719-llvm-c9a012e4eab7f2e9aa8701451b0bf7c6b80222ac.tar.gz bcm5719-llvm-c9a012e4eab7f2e9aa8701451b0bf7c6b80222ac.zip | |
[x86] fold shuffles of h-ops that have an undef operand
If an operand is undef, we can assume it's the same as the
other operand.
llvm-svn: 357644
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/haddsub-shuf.ll | 1 |
2 files changed, 2 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 84009979ea1..a05fd557ca2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32880,8 +32880,8 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) { // lanes of each operand as: // v4X32: A[0] + A[1] , A[2] + A[3] , B[0] + B[1] , B[2] + B[3] // ...similarly for v2f64 and v8i16. - // TODO: Handle UNDEF operands. - if (HOp.getOperand(0) != HOp.getOperand(1)) + if (!HOp.getOperand(0).isUndef() && !HOp.getOperand(1).isUndef() && + HOp.getOperand(0) != HOp.getOperand(1)) return SDValue(); // When the operands of a horizontal math op are identical, the low half of diff --git a/llvm/test/CodeGen/X86/haddsub-shuf.ll b/llvm/test/CodeGen/X86/haddsub-shuf.ll index e50d6af6e09..c37e9862ed7 100644 --- a/llvm/test/CodeGen/X86/haddsub-shuf.ll +++ b/llvm/test/CodeGen/X86/haddsub-shuf.ll @@ -381,7 +381,6 @@ define <4 x double> @hadd_v4f64_scalar_splat(<4 x double> %a) { ; AVX-LABEL: hadd_v4f64_scalar_splat: ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %ymm0, %ymm0, %ymm0 -; AVX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] ; AVX-NEXT: retq %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 |

