[AArch64] Tie source and destination operands for AESMC/AESIMC.

Summary: Most CPUs implementing AES fusion require instruction pairs of the form AESE Vn, _ AESMC Vn, Vn and AESD Vn, _ AESIMC Vn, Vn The constraint is added to AES(I)MC instructions which use the result of an AES(E|D) instruction by using AES(I)MCTrr pseudo instructions, which constraint source and destination registers to be the same. A nice side effect of this change is that now all possible pairs are scheduled back-to-back on the exynos-m1 for the misched-fusion-aes.ll test case. I had to update aes_load_store. The version I added initially was very reduced and with the new constraint, AESE/AESMC could not be scheduled back-to-back. I updated the test to be more realistic and still expose the same scheduling problem as the initial test case. Reviewers: t.p.northover, rengolin, evandro, kristof.beyls, silviu.baranga Reviewed By: t.p.northover, evandro Subscribers: aemerson, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D35299 llvm-svn: 309495
author: Florian Hahn <florian.hahn@arm.com> 2017-07-29 20:35:28 +0000
committer: Florian Hahn <florian.hahn@arm.com> 2017-07-29 20:35:28 +0000
commit: f63a5e91dbedd7d1388e08b5f491d90f8f2c1a0f (patch)
tree: 0e50cc590fde608df14c9739358040b80a420786 /llvm/lib/Target
parent: 2f86e3d4948853f7b753e7ae4b9c9eac8f6e264f (diff)
download: bcm5719-llvm-f63a5e91dbedd7d1388e08b5f491d90f8f2c1a0f.tar.gz
bcm5719-llvm-f63a5e91dbedd7d1388e08b5f491d90f8f2c1a0f.zip
3 files changed, 43 insertions, 1 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 454302570e9..fdb90f40aa8 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -966,6 +966,18 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
   case AArch64::CMP_SWAP_128:
     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
 
+  case AArch64::AESMCrrTied:
+  case AArch64::AESIMCrrTied: {
+    MachineInstrBuilder MIB =
+    BuildMI(MBB, MBBI, MI.getDebugLoc(),
+            TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
+                                                      AArch64::AESIMCrr))
+      .add(MI.getOperand(0))
+      .add(MI.getOperand(1));
+    transferImpOps(MI, MIB, MIB);
+    MI.eraseFromParent();
+    return true;
+   }
   }
   return false;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0dcf07f9841..5049a39814f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -37,6 +37,9 @@ def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
                                  AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
 def HasSPE           : Predicate<"Subtarget->hasSPE()">,
                                  AssemblerPredicate<"FeatureSPE", "spe">;
+def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
+                                 AssemblerPredicate<"FeatureFuseAES",
+                                 "fuse-aes">;
 def HasSVE           : Predicate<"Subtarget->hasSVE()">,
                                  AssemblerPredicate<"FeatureSVE", "sve">;
 
@@ -5304,6 +5307,31 @@ def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
 def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
 def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
 
+// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
+// for AES fusion on some CPUs.
+let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
+def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
+                        Sched<[WriteV]>;
+def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
+                         Sched<[WriteV]>;
+}
+
+// Only use constrained versions of AES(I)MC instructions if they are paired with
+// AESE/AESD.
+def : Pat<(v16i8 (int_aarch64_crypto_aesmc
+            (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
+                                            (v16i8 V128:$src2))))),
+          (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
+                                             (v16i8 V128:$src2)))))>,
+          Requires<[HasFuseAES]>;
+
+def : Pat<(v16i8 (int_aarch64_crypto_aesimc
+            (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
+                                            (v16i8 V128:$src2))))),
+          (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
+                                              (v16i8 V128:$src2)))))>,
+          Requires<[HasFuseAES]>;
+
 def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
 def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
 def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index ccc9d2ad1b4..963cfadc54f 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -118,11 +118,13 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
     // Fuse AES crypto operations.
     switch(SecondOpcode) {
     // AES encode.
-    case AArch64::AESMCrr :
+    case AArch64::AESMCrr:
+    case AArch64::AESMCrrTied:
       return FirstOpcode == AArch64::AESErr ||
              FirstOpcode == AArch64::INSTRUCTION_LIST_END;
     // AES decode.
     case AArch64::AESIMCrr:
+    case AArch64::AESIMCrrTied:
       return FirstOpcode == AArch64::AESDrr ||
              FirstOpcode == AArch64::INSTRUCTION_LIST_END;
     }
author	Florian Hahn <florian.hahn@arm.com>	2017-07-29 20:35:28 +0000
committer	Florian Hahn <florian.hahn@arm.com>	2017-07-29 20:35:28 +0000
commit	f63a5e91dbedd7d1388e08b5f491d90f8f2c1a0f (patch)
tree	0e50cc590fde608df14c9739358040b80a420786 /llvm/lib/Target
parent	2f86e3d4948853f7b753e7ae4b9c9eac8f6e264f (diff)
download	bcm5719-llvm-f63a5e91dbedd7d1388e08b5f491d90f8f2c1a0f.tar.gz bcm5719-llvm-f63a5e91dbedd7d1388e08b5f491d90f8f2c1a0f.zip