summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorAmara Emerson <aemerson@apple.com>2019-04-15 22:34:08 +0000
committerAmara Emerson <aemerson@apple.com>2019-04-15 22:34:08 +0000
commit02a90ea73de5f11a476787b66246eb1f39fa9e4a (patch)
tree56e573ea5d76a0aedea84683be9fa8f3916461e5 /llvm
parentfda04268886028c0b0b7fd9546548714e4209d5e (diff)
downloadbcm5719-llvm-02a90ea73de5f11a476787b66246eb1f39fa9e4a.tar.gz
bcm5719-llvm-02a90ea73de5f11a476787b66246eb1f39fa9e4a.zip
[AArch64][GlobalISel] Don't do extending loads combine for non-pow-2 types.
Since non-pow-2 types are going to get split up into multiple loads anyway, don't do the [SZ]EXTLOAD combine for those and save us trouble later in legalization. llvm-svn: 358458
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp5
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/non-pow-2-extload-combine.mir37
3 files changed, 43 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 2eab5748980..4a4431643b0 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -194,6 +194,11 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
if (LoadValueTy.getSizeInBits() < 8)
return false;
+ // For non power-of-2 types, they will very likely be legalized into multiple
+ // loads. Don't bother trying to match them into extending loads.
+ if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
+ return false;
+
// Find the preferred type aside from the any-extends (unless it's the only
// one) and non-extending ops. We'll emit an extending load to that type and
// and emit a variant of (extend (trunc X)) for the others according to the
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index 996b5c14142..a0c3af5c1b5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -54,7 +54,7 @@ false:
}
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %2:_(s32) = G_ZEXTLOAD %1:_(p0) :: (load 3 from `i24* undef`, align 1) (in function: odd_type_load)
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s32) = G_LOAD %1:_(p0) :: (load 3 from `i24* undef`, align 1) (in function: odd_type_load)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type_load
; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type_load
define i32 @odd_type_load() {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/non-pow-2-extload-combine.mir b/llvm/test/CodeGen/AArch64/GlobalISel/non-pow-2-extload-combine.mir
new file mode 100644
index 00000000000..d9e4df8cf45
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/non-pow-2-extload-combine.mir
@@ -0,0 +1,37 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=aarch64 -run-pass=aarch64-prelegalizer-combiner %s -o - -verify-machineinstrs | FileCheck %s
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64"
+
+ define i32 @ld_zext_i24(i24* %ptr, i24* %ptr2) {
+ %load = load i24, i24* %ptr, align 1
+ %ext = zext i24 %load to i32
+ ret i32 %ext
+ }
+
+...
+---
+name: ld_zext_i24
+alignment: 2
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: ld_zext_i24
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[COPY]](p0) :: (load 3 from %ir.ptr, align 1)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24)
+ ; CHECK: $w0 = COPY [[ZEXT]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %2:_(s24) = G_LOAD %0(p0) :: (load 3 from %ir.ptr, align 1)
+ %3:_(s32) = G_ZEXT %2(s24)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR implicit $w0
+
+...
OpenPOWER on IntegriCloud