summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AArch64
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2017-12-18 10:36:00 +0000
committerTim Northover <tnorthover@apple.com>2017-12-18 10:36:00 +0000
commit9097a07e4e83b9f394598d53557247277b064b09 (patch)
tree33a9801ce85939366b6da2d17c31e19c12ba74c6 /llvm/lib/Target/AArch64
parent7bd3fb15e11d4ad8f7b0df8085fb947093267d61 (diff)
downloadbcm5719-llvm-9097a07e4e83b9f394598d53557247277b064b09.tar.gz
bcm5719-llvm-9097a07e4e83b9f394598d53557247277b064b09.zip
AArch64: work around how Cyclone handles "movi.2d vD, #0".
For Cylone, the instruction "movi.2d vD, #0" is executed incorrectly in some rare circumstances. Work around the issue conservatively by avoiding the instruction entirely. This patch changes CodeGen so that problematic instructions are never generated, and the AsmParser so that an equivalent instruction is used (with a warning). llvm-svn: 320965
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td9
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h5
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp25
4 files changed, 39 insertions, 2 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 9cb4eafa099..75fb937de9b 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -61,6 +61,12 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions">;
+/// ... but the floating-point version doesn't quite work in rare cases on older
+/// CPUs.
+def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround",
+ "HasZeroCycleZeroingFPWorkaround", "true",
+ "The zero-cycle floating-point zeroing instruction has a bug">;
+
def FeatureStrictAlign : SubtargetFeature<"strict-align",
"StrictAlign", "true",
"Disallow all unaligned memory "
@@ -290,7 +296,8 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
FeaturePerfMon,
FeatureSlowMisaligned128Store,
FeatureZCRegMove,
- FeatureZCZeroing
+ FeatureZCZeroing,
+ FeatureZCZeroingFPWorkaround
]>;
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 56fcff606aa..67138f41dda 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -523,7 +523,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
unsigned DestReg = MI.getOperand(0).getReg();
- if (STI->hasZeroCycleZeroing()) {
+ if (STI->hasZeroCycleZeroing() && !STI->hasZeroCycleZeroingFPWorkaround()) {
// Convert H/S/D register to corresponding Q register
if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index a73ba887413..5d9759d363d 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -86,6 +86,7 @@ protected:
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing = false;
+ bool HasZeroCycleZeroingFPWorkaround = false;
// StrictAlign - Disallow unaligned memory accesses.
bool StrictAlign = false;
@@ -197,6 +198,10 @@ public:
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+ bool hasZeroCycleZeroingFPWorkaround() const {
+ return HasZeroCycleZeroingFPWorkaround;
+ }
+
bool requiresStrictAlign() const { return StrictAlign; }
bool isXRaySupported() const override { return true; }
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 2763a5b3a90..fd2ef18fbe0 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3796,6 +3796,31 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
}
}
+
+ // The Cyclone CPU and early successors didn't execute the zero-cycle zeroing
+ // instruction for FP registers correctly in some rare circumstances. Convert
+ // it to a safe instruction and warn (because silently changing someone's
+ // assembly is rude).
+ if (getSTI().getFeatureBits()[AArch64::FeatureZCZeroingFPWorkaround] &&
+ NumOperands == 4 && Tok == "movi") {
+ AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
+ AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+ if ((Op1.isToken() && Op2.isNeonVectorReg() && Op3.isImm()) ||
+ (Op1.isNeonVectorReg() && Op2.isToken() && Op3.isImm())) {
+ StringRef Suffix = Op1.isToken() ? Op1.getToken() : Op2.getToken();
+ if (Suffix.lower() == ".2d" &&
+ cast<MCConstantExpr>(Op3.getImm())->getValue() == 0) {
+ Warning(IDLoc, "instruction movi.2d with immediate #0 may not function"
+ " correctly on this CPU, converting to equivalent movi.16b");
+ // Switch the suffix to .16b.
+ unsigned Idx = Op1.isToken() ? 1 : 2;
+ Operands[Idx] = AArch64Operand::CreateToken(".16b", false, IDLoc,
+ getContext());
+ }
+ }
+ }
+
// FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands.
// InstAlias can't quite handle this since the reg classes aren't
// subclasses.
OpenPOWER on IntegriCloud