summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2019-04-10 18:00:41 +0000
committerDavid Green <david.green@arm.com>2019-04-10 18:00:41 +0000
commit0861c87b06c5573d919ec550bbbd2a1624d22ba0 (patch)
tree99fec9561f6122cd38721bf5947e097a61ad7348
parent5f6eb1817af8d525ee93ac8a1f0cf9c881b3ebd3 (diff)
downloadbcm5719-llvm-0861c87b06c5573d919ec550bbbd2a1624d22ba0.tar.gz
bcm5719-llvm-0861c87b06c5573d919ec550bbbd2a1624d22ba0.zip
Revert rL357745: [SelectionDAG] Compute known bits of CopyFromReg
Certain optimisations from ConstantHoisting and CGP rely on Selection DAG not seeing through to the constant in other blocks. Revert this patch while we come up with a better way to handle that. I will try to follow this up with some better tests. llvm-svn: 358113
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp20
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll10
-rw-r--r--llvm/test/CodeGen/ARM/atomic-op.ll8
-rw-r--r--llvm/test/CodeGen/PowerPC/pr35688.ll16
-rw-r--r--llvm/test/CodeGen/SystemZ/subregliveness-04.ll2
-rw-r--r--llvm/test/CodeGen/X86/fold-tied-op.ll2
-rw-r--r--llvm/test/CodeGen/X86/pr28444.ll5
8 files changed, 27 insertions, 42 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index caa3f14b8a1..c017d6d6eae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -3208,25 +3207,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.One &= Known2.One;
break;
}
- case ISD::CopyFromReg: {
- auto R = cast<RegisterSDNode>(Op.getOperand(1));
- const unsigned Reg = R->getReg();
-
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- if (!TRI->isVirtualRegister(Reg))
- break;
-
- const MachineRegisterInfo *MRI = &MF->getRegInfo();
- if (!MRI->hasOneDef(Reg))
- break;
-
- const FunctionLoweringInfo::LiveOutInfo *LOI = FLI->GetLiveOutRegInfo(Reg);
- if (!LOI || LOI->Known.getBitWidth() != BitWidth)
- break;
-
- Known = LOI->Known;
- break;
- }
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2f5db87090e..190d5708324 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19595,10 +19595,10 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
- // If the operand types disagree, extend or truncate the shift amount to match.
- // Since BT ignores high bits (like shifts) we can use anyextend for the extension.
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
if (Src.getValueType() != BitNo.getValueType())
- BitNo = DAG.getAnyExtOrTrunc(BitNo, dl, Src.getValueType());
+ BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
dl, MVT::i8);
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
index 4e85ca0cc52..1c450e7c0b9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
@@ -110,8 +110,8 @@ main_body:
;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb:
;CHECK-NOT: s_waitcnt;
-;CHECK-NOT: v_or_b32
-;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
+;CHECK: v_or_b32
+;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
main_body:
%tmp = shl i32 %index, 4
@@ -127,8 +127,10 @@ bb1: ; preds = %main_body
;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb_merged:
;CHECK-NOT: s_waitcnt;
-;CHECK-NOT: v_or_b32
-;CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
+;CHECK: v_or_b32
+;CHECK: v_or_b32
+;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
main_body:
%tmp = shl i32 %index, 4
diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll
index 5f206b6189d..8ab20267a18 100644
--- a/llvm/test/CodeGen/ARM/atomic-op.ll
+++ b/llvm/test/CodeGen/ARM/atomic-op.ll
@@ -183,11 +183,11 @@ entry:
ret void
}
-define void @func2(i16 %int_val) nounwind {
+define void @func2() nounwind {
entry:
%val = alloca i16
%old = alloca i16
- store i16 %int_val, i16* %val
+ store i16 31, i16* %val
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
@@ -197,7 +197,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%0 = atomicrmw umin i16* %val, i16 16 monotonic
store i16 %0, i16* %old
- %uneg = sub i16 0, 2
+ %uneg = sub i16 0, 1
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
@@ -249,7 +249,7 @@ entry:
; CHECK-T1-M0: bl ___sync_fetch_and_umin_1
; CHECK-BAREMETAL: cmp
; CHECK-BAREMETAL-NOT: __sync
- %uneg = sub i8 0, 2
+ %uneg = sub i8 0, 1
%1 = atomicrmw umin i8* %val, i8 %uneg monotonic
store i8 %1, i8* %old
; CHECK: ldrex
diff --git a/llvm/test/CodeGen/PowerPC/pr35688.ll b/llvm/test/CodeGen/PowerPC/pr35688.ll
index 7573e871ef4..098573ec1b0 100644
--- a/llvm/test/CodeGen/PowerPC/pr35688.ll
+++ b/llvm/test/CodeGen/PowerPC/pr35688.ll
@@ -6,14 +6,16 @@
; Function Attrs: nounwind
define void @ec_GFp_nistp256_points_mul() {
; CHECK-LABEL: ec_GFp_nistp256_points_mul:
-; CHECK: ld 4, 0(3)
-; CHECK: li 3, 0
-; CHECK: subfic 5, 4, 0
-; CHECK: subfze 5, 3
+; CHECK: ld 5, 0(3)
+; CHECK: li 3, 127
+; CHECK: li 4, 0
+; CHECK: subfic 6, 5, 0
+; CHECK: subfze 6, 4
+; CHECK: sradi 7, 6, 63
+; CHECK: srad 6, 6, 3
+; CHECK: subfc 5, 5, 7
+; CHECK: subfe 5, 4, 6
; CHECK: sradi 5, 5, 63
-; CHECK: subfc 4, 4, 5
-; CHECK: subfe 4, 3, 5
-; CHECK: sradi 4, 4, 63
; With MemorySSA, everything is taken out of the loop by licm.
; Loads and stores to undef are treated as non-aliasing.
diff --git a/llvm/test/CodeGen/SystemZ/subregliveness-04.ll b/llvm/test/CodeGen/SystemZ/subregliveness-04.ll
index cb9ae9bbd43..11ecc9bd9c7 100644
--- a/llvm/test/CodeGen/SystemZ/subregliveness-04.ll
+++ b/llvm/test/CodeGen/SystemZ/subregliveness-04.ll
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp -systemz-subreg-liveness < %s | FileCheck %s
; Check for successful compilation.
-; CHECK: lhi {{%r[0-9]+}}, -5
+; CHECK: lhi %r0, -5
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
target triple = "s390x-ibm-linux"
diff --git a/llvm/test/CodeGen/X86/fold-tied-op.ll b/llvm/test/CodeGen/X86/fold-tied-op.ll
index 6fe1713a541..eb06eb75a4d 100644
--- a/llvm/test/CodeGen/X86/fold-tied-op.ll
+++ b/llvm/test/CodeGen/X86/fold-tied-op.ll
@@ -6,8 +6,8 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386--netbsd"
; CHECK-LABEL: fn1
-; CHECK: addl {{.*#+}} 4-byte Folded Reload
; CHECK: orl {{.*#+}} 4-byte Folded Reload
+; CHECK: addl {{.*#+}} 4-byte Folded Reload
; CHECK: xorl {{.*#+}} 4-byte Folded Reload
; CHECK: xorl {{.*#+}} 4-byte Folded Reload
; CHECK: retl
diff --git a/llvm/test/CodeGen/X86/pr28444.ll b/llvm/test/CodeGen/X86/pr28444.ll
index 4d7d08afa54..23383209e38 100644
--- a/llvm/test/CodeGen/X86/pr28444.ll
+++ b/llvm/test/CodeGen/X86/pr28444.ll
@@ -11,8 +11,9 @@
define void @extractelt_mismatch_vector_element_type(i32 %arg, i1 %x) {
; CHECK-LABEL: extractelt_mismatch_vector_element_type:
; CHECK: # %bb.0: # %bb
-; CHECK-NEXT: movb $1, (%rax)
-; CHECK-NEXT: movb $1, (%rax)
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: movb %al, (%rax)
+; CHECK-NEXT: movb %al, (%rax)
; CHECK-NEXT: retq
bb:
%tmp = icmp ult i32 %arg, 0
OpenPOWER on IntegriCloud