summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/PowerPC/PPCInstrInfo.td
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2013-12-12 00:19:11 +0000
committerHal Finkel <hfinkel@anl.gov>2013-12-12 00:19:11 +0000
commitceb1f12d9a5a6bb9135796cfaccc84baf23a1a0f (patch)
treea6399cfc44b6dc26a33dc9e0437772d7b39faaf4 /llvm/lib/Target/PowerPC/PPCInstrInfo.td
parent03071ab74c4f9efec0dab003eec5ec9c604a868c (diff)
downloadbcm5719-llvm-ceb1f12d9a5a6bb9135796cfaccc84baf23a1a0f.tar.gz
bcm5719-llvm-ceb1f12d9a5a6bb9135796cfaccc84baf23a1a0f.zip
Improve instruction scheduling for the PPC POWER7
Aside from a few minor latency corrections, the major change here is a new hazard recognizer which focuses on better dispatch-group formation on the POWER7. As with the PPC970's hazard recognizer, the most important thing it does is avoid load-after-store hazards within the same dispatch group. It uses the POWER7's special dispatch-group-terminating nop instruction (instead of inserting multiple regular nop instructions). This new hazard recognizer makes use of the scheduling dependency graph itself, built using AA information, to robustly detect the possibility of load-after-store hazards. significant test-suite performance changes (the error bars are 99.5% confidence intervals based on 5 test-suite runs both with and without the change -- speedups are negative): speedups: MultiSource/Benchmarks/FreeBench/pcompress2/pcompress2 -0.55171% +/- 0.333168% MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CrossingThresholds-dbl -17.5576% +/- 14.598% MultiSource/Benchmarks/TSVC/Reductions-dbl/Reductions-dbl -29.5708% +/- 7.09058% MultiSource/Benchmarks/TSVC/Reductions-flt/Reductions-flt -34.9471% +/- 11.4391% SingleSource/Benchmarks/BenchmarkGame/puzzle -25.1347% +/- 11.0104% SingleSource/Benchmarks/Misc/flops-8 -17.7297% +/- 9.79061% SingleSource/Benchmarks/Shootout-C++/ary3 -35.5018% +/- 23.9458% SingleSource/Regression/C/uint64_to_float -56.3165% +/- 25.4234% SingleSource/UnitTests/Vectorizer/gcc-loops -18.5309% +/- 6.8496% regressions: MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000 18.351% +/- 12.156% SingleSource/Benchmarks/Shootout-C++/methcall 27.3086% +/- 14.4733% llvm-svn: 197099
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCInstrInfo.td')
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td9
1 files changed, 9 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 8754b40e09f..11c296dd336 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1616,8 +1616,17 @@ def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"xoris $dst, $src1, $src2", IIC_IntSimple,
[(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
+
def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple,
[]>;
+let isCodeGenOnly = 1 in {
+// The POWER6 and POWER7 have special group-terminating nops.
+def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins),
+ "ori 1, 1, 0", IIC_IntSimple, []>;
+def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins),
+ "ori 2, 2, 0", IIC_IntSimple, []>;
+}
+
let isCompare = 1, neverHasSideEffects = 1 in {
def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
"cmpwi $crD, $rA, $imm", IIC_IntCompare>;
OpenPOWER on IntegriCloud