summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td5
-rw-r--r--llvm/lib/Target/AArch64/AArch64MacroFusion.cpp33
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h2
-rw-r--r--llvm/test/CodeGen/AArch64/misched-fusion-addr.ll112
4 files changed, 152 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 7721e897060..7e510a1fbd6 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -120,6 +120,10 @@ def FeatureArithmeticCbzFusion : SubtargetFeature<
"arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
"CPU fuses arithmetic + cbz/cbnz operations">;
+def FeatureFuseAddress : SubtargetFeature<
+ "fuse-address", "HasFuseAddress", "true",
+ "CPU fuses address generation and memory operations">;
+
def FeatureFuseAES : SubtargetFeature<
"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
@@ -346,6 +350,7 @@ def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
FeatureCrypto,
FeatureExynosCheapAsMoveHandling,
FeatureFPARMv8,
+ FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseLiterals,
FeatureLSLFast,
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index 6930c816b5a..2f58306f6c6 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -150,6 +150,39 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
SecondMI.getOperand(3).getImm() == 48);
}
+ if (ST.hasFuseAddress()) {
+ // Fuse address generation and loads and stores.
+ if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
+ FirstOpcode == AArch64::ADR ||
+ FirstOpcode == AArch64::ADRP) &&
+ ((SecondOpcode == AArch64::STRBBui ||
+ SecondOpcode == AArch64::STRBui ||
+ SecondOpcode == AArch64::STRDui ||
+ SecondOpcode == AArch64::STRHHui ||
+ SecondOpcode == AArch64::STRHui ||
+ SecondOpcode == AArch64::STRQui ||
+ SecondOpcode == AArch64::STRSui ||
+ SecondOpcode == AArch64::STRWui ||
+ SecondOpcode == AArch64::STRXui ||
+ SecondOpcode == AArch64::LDRBBui ||
+ SecondOpcode == AArch64::LDRBui ||
+ SecondOpcode == AArch64::LDRDui ||
+ SecondOpcode == AArch64::LDRHHui ||
+ SecondOpcode == AArch64::LDRHui ||
+ SecondOpcode == AArch64::LDRQui ||
+ SecondOpcode == AArch64::LDRSBWui ||
+ SecondOpcode == AArch64::LDRSBXui ||
+ SecondOpcode == AArch64::LDRSHWui ||
+ SecondOpcode == AArch64::LDRSHXui ||
+ SecondOpcode == AArch64::LDRSWui ||
+ SecondOpcode == AArch64::LDRSui ||
+ SecondOpcode == AArch64::LDRWui ||
+ SecondOpcode == AArch64::LDRXui) &&
+ (FirstOpcode != AArch64::ADR ||
+ SecondMI.getOperand(2).getImm() == 0)))
+ return true;
+ }
+
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 9b96bfa7f34..d06f8a1ae57 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -111,6 +111,7 @@ protected:
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
+ bool HasFuseAddress = false;
bool HasFuseAES = false;
bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
@@ -236,6 +237,7 @@ public:
}
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
+ bool hasFuseAddress() const { return HasFuseAddress; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll
new file mode 100644
index 00000000000..9dfe9d3b602
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll
@@ -0,0 +1,112 @@
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-address | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s
+
+target triple = "aarch64-unknown"
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+@var_128bit = global i128 0
+@var_half = global half 0.0
+@var_float = global float 0.0
+@var_double = global double 0.0
+@var_double2 = global <2 x double> <double 0.0, double 0.0>
+
+define void @ldst_8bit() {
+ %val8 = load volatile i8, i8* @var_8bit
+ %ext = zext i8 %val8 to i64
+ %add = add i64 %ext, 1
+ %val16 = trunc i64 %add to i16
+ store volatile i16 %val16, i16* @var_16bit
+ ret void
+
+; CHECK-LABEL: ldst_8bit:
+; CHECK: adrp [[RB:x[0-9]+]], var_8bit
+; CHECK-NEXT: ldrb {{w[0-9]+}}, {{\[}}[[RB]], {{#?}}:lo12:var_8bit{{\]}}
+; CHECK: adrp [[RH:x[0-9]+]], var_16bit
+; CHECK-NEXT: strh {{w[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_16bit{{\]}}
+}
+
+define void @ldst_16bit() {
+ %val16 = load volatile i16, i16* @var_16bit
+ %ext = zext i16 %val16 to i64
+ %add = add i64 %ext, 1
+ %val32 = trunc i64 %add to i32
+ store volatile i32 %val32, i32* @var_32bit
+ ret void
+
+; CHECK-LABEL: ldst_16bit:
+; CHECK: adrp [[RH:x[0-9]+]], var_16bit
+; CHECK-NEXT: ldrh {{w[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_16bit{{\]}}
+; CHECK: adrp [[RW:x[0-9]+]], var_32bit
+; CHECK-NEXT: str {{w[0-9]+}}, {{\[}}[[RW]], {{#?}}:lo12:var_32bit{{\]}}
+}
+
+define void @ldst_32bit() {
+ %val32 = load volatile i32, i32* @var_32bit
+ %ext = zext i32 %val32 to i64
+ %val64 = add i64 %ext, 1
+ store volatile i64 %val64, i64* @var_64bit
+ ret void
+
+; CHECK-LABEL: ldst_32bit:
+; CHECK: adrp [[RW:x[0-9]+]], var_32bit
+; CHECK-NEXT: ldr {{w[0-9]+}}, {{\[}}[[RW]], {{#?}}:lo12:var_32bit{{\]}}
+; CHECK: adrp [[RL:x[0-9]+]], var_64bit
+; CHECK-NEXT: str {{x[0-9]+}}, {{\[}}[[RL]], {{#?}}:lo12:var_64bit{{\]}}
+}
+
+define void @ldst_64bit() {
+ %val64 = load volatile i64, i64* @var_64bit
+ %ext = zext i64 %val64 to i128
+ %val128 = add i128 %ext, 1
+ store volatile i128 %val128, i128* @var_128bit
+ ret void
+
+; CHECK-LABEL: ldst_64bit:
+; CHECK: adrp [[RL:x[0-9]+]], var_64bit
+; CHECK-NEXT: ldr {{x[0-9]+}}, {{\[}}[[RL]], {{#?}}:lo12:var_64bit{{\]}}
+; CHECK: adrp [[RQ:x[0-9]+]], var_128bit
+; CHECK-NEXT: add {{x[0-9]+}}, [[RQ]], {{#?}}:lo12:var_128bit
+}
+
+define void @ldst_half() {
+ %valh = load volatile half, half* @var_half
+ %valf = fpext half %valh to float
+ store volatile float %valf, float* @var_float
+ ret void
+
+; CHECK-LABEL: ldst_half:
+; CHECK: adrp [[RH:x[0-9]+]], var_half
+; CHECK-NEXT: ldr {{h[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_half{{\]}}
+; CHECK: adrp [[RF:x[0-9]+]], var_float
+; CHECK-NEXT: str {{s[0-9]+}}, {{\[}}[[RF]], {{#?}}:lo12:var_float{{\]}}
+}
+
+define void @ldst_float() {
+ %valf = load volatile float, float* @var_float
+ %vald = fpext float %valf to double
+ store volatile double %vald, double* @var_double
+ ret void
+
+; CHECK-LABEL: ldst_float:
+; CHECK: adrp [[RF:x[0-9]+]], var_float
+; CHECK-NEXT: ldr {{s[0-9]+}}, {{\[}}[[RF]], {{#?}}:lo12:var_float{{\]}}
+; CHECK: adrp [[RD:x[0-9]+]], var_double
+; CHECK-NEXT: str {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}}
+}
+
+define void @ldst_double() {
+ %vald = load volatile double, double* @var_double
+ %val = insertelement <2 x double> undef, double %vald, i32 0
+ %vald2 = insertelement <2 x double> %val, double %vald, i32 1
+ store volatile <2 x double> %vald2, <2 x double>* @var_double2
+ ret void
+
+; CHECK-LABEL: ldst_double:
+; CHECK: adrp [[RD:x[0-9]+]], var_double
+; CHECK-NEXT: add {{x[0-9]+}}, [[RD]], {{#?}}:lo12:var_double
+; CHECK: adrp [[RQ:x[0-9]+]], var_double2
+; CHECK-NEXT: str {{q[0-9]+}}, {{\[}}[[RQ]], {{#?}}:lo12:var_double2{{\]}}
+}
OpenPOWER on IntegriCloud