summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/ARM/ARM.td4
-rw-r--r--llvm/lib/Target/ARM/ARMMacroFusion.cpp63
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h7
-rw-r--r--llvm/test/CodeGen/ARM/misched-fusion-lit.ll39
4 files changed, 94 insertions, 19 deletions
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 742b3551889..2e62a079041 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -141,6 +141,10 @@ def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
+// Fast execution of bottom and top halves of literal generation
+def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true",
+ "CPU fuses literal generation operations">;
+
// The way of reading thread pointer
def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true",
"Reading thread pointer from register">;
diff --git a/llvm/lib/Target/ARM/ARMMacroFusion.cpp b/llvm/lib/Target/ARM/ARMMacroFusion.cpp
index f2dc650a6f3..d11fe9d5c50 100644
--- a/llvm/lib/Target/ARM/ARMMacroFusion.cpp
+++ b/llvm/lib/Target/ARM/ARMMacroFusion.cpp
@@ -19,6 +19,47 @@
namespace llvm {
+// Fuse AES crypto encoding or decoding.
+static bool isAESPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = SecondMI.getOpcode();
+
+ switch(SecondOpcode) {
+ // AES encode.
+ case ARM::AESMC :
+ return FirstOpcode == ARM::AESE ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ // AES decode.
+ case ARM::AESIMC:
+ return FirstOpcode == ARM::AESD ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ }
+
+ return false;
+}
+
+// Fuse literal generation.
+static bool isLiteralsPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = SecondMI.getOpcode();
+
+ // 32 bit immediate.
+ if ((FirstOpcode == ARM::INSTRUCTION_LIST_END ||
+ FirstOpcode == ARM::MOVi16) &&
+ SecondOpcode == ARM::MOVTi16)
+ return true;
+
+ return false;
+}
+
/// Check if the instr pair, FirstMI and SecondMI, should be fused
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
@@ -28,24 +69,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
const MachineInstr &SecondMI) {
const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(TSI);
- // Assume wildcards for unspecified instrs.
- unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
-
- if (ST.hasFuseAES())
- // Fuse AES crypto operations.
- switch(SecondOpcode) {
- // AES encode.
- case ARM::AESMC :
- return FirstOpcode == ARM::AESE ||
- FirstOpcode == ARM::INSTRUCTION_LIST_END;
- // AES decode.
- case ARM::AESIMC:
- return FirstOpcode == ARM::AESD ||
- FirstOpcode == ARM::INSTRUCTION_LIST_END;
- }
+ if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
+ return true;
+ if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
+ return true;
return false;
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 165077926c8..74aee9a8ed3 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -327,6 +327,10 @@ protected:
/// pairs faster.
bool HasFuseAES = false;
+ /// HasFuseLiterals - if true, processor executes back to back
+ /// bottom and top halves of literal generation faster.
+ bool HasFuseLiterals = false;
+
/// If true, if conversion may decide to leave some instructions unpredicated.
bool IsProfitableToUnpredicate = false;
@@ -616,8 +620,9 @@ public:
bool hasFullFP16() const { return HasFullFP16; }
bool hasFuseAES() const { return HasFuseAES; }
+ bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
- bool hasFusion() const { return hasFuseAES(); }
+ bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/llvm/test/CodeGen/ARM/misched-fusion-lit.ll b/llvm/test/CodeGen/ARM/misched-fusion-lit.ll
new file mode 100644
index 00000000000..cfc4356e01a
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/misched-fusion-lit.ll
@@ -0,0 +1,39 @@
+; RUN: llc %s -o - -mtriple=armv8-unknown -mattr=-fuse-literals,+use-misched | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT
+; RUN: llc %s -o - -mtriple=armv8-unknown -mattr=+fuse-literals,+use-misched | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
+
+@g = common global i32* zeroinitializer
+
+define i32* @litp(i32 %a, i32 %b) {
+entry:
+ %add = add nsw i32 %b, %a
+ %ptr = getelementptr i32, i32* bitcast (i32* (i32, i32)* @litp to i32*), i32 %add
+ %res = getelementptr i32, i32* bitcast (i32** @g to i32*), i32 %add
+ store i32* %ptr, i32** @g, align 4
+ ret i32* %res
+
+; CHECK-LABEL: litp:
+; CHECK: movw [[R:r[0-9]+]], :lower16:litp
+; CHECKDONT-NEXT: movw [[S:r[0-9]+]], :lower16:g
+; CHECKFUSE-NEXT: movt [[R]], :upper16:litp
+; CHECKFUSE-NEXT: movw [[S:r[0-9]+]], :lower16:g
+; CHECKFUSE-NEXT: movt [[S]], :upper16:g
+}
+
+define i32 @liti(i32 %a, i32 %b) {
+entry:
+ %adda = add i32 %a, -262095121
+ %add1 = add i32 %adda, %b
+ %addb = add i32 %b, 121110837
+ %add2 = add i32 %addb, %a
+ store i32 %add1, i32* bitcast (i32** @g to i32*), align 4
+ ret i32 %add2
+
+; CHECK-LABEL: liti:
+; CHECK: movw [[R:r[0-9]+]], #309
+; CHECKDONT-NEXT: add {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}
+; CHECKFUSE-NEXT: movt [[R]], #1848
+; CHECKFUSE: movw [[S:r[0-9]+]], :lower16:g
+; CHECKFUSE-NEXT: movt [[S]], :upper16:g
+; CHECKFUSE-NEXT: movw [[T:r[0-9]+]], #48879
+; CHECKFUSE-NEXT: movt [[T]], #61536
+}
OpenPOWER on IntegriCloud