[Power9] Part-word VSX integer scalar loads/stores and sign extend instructions

This patch corresponds to review: https://reviews.llvm.org/D23155 This patch removes the VSHRC register class (based on D20310) and adds exploitation of the Power9 sub-word integer loads into VSX registers as well as vector sign extensions. The new instructions are useful for a few purposes: Int to Fp conversions of 1 or 2-byte values loaded from memory Building vectors of 1 or 2-byte integers with values loaded from memory Storing individual 1 or 2-byte elements from integer vectors This patch implements all of those uses. llvm-svn: 283190
author: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2016-10-04 06:59:23 +0000
committer: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2016-10-04 06:59:23 +0000
commit: 11049f8f07330c30c689d54ed91d63909b360cf2 (patch)
tree: a70b349635cb8c9c5d445cc77595f2794e5e0dc4 /llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
parent: 6b929d5ba91eec2acd6f47c6921ce0dff9a94cd6 (diff)
download: bcm5719-llvm-11049f8f07330c30c689d54ed91d63909b360cf2.tar.gz
bcm5719-llvm-11049f8f07330c30c689d54ed91d63909b360cf2.zip
1 files changed, 57 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index a57a83d7aa9..3360e74db99 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -170,11 +170,68 @@ bool PPCMIPeephole::simplifyCode(void) {
                 ToErase = &MI;
                 Simplified = true;
               }
+            } else if ((Immed == 0 || Immed == 3) &&
+                       DefMI && DefMI->getOpcode() == PPC::XXPERMDIs) {
+              // Splat fed by another splat - switch the output of the first
+              // and remove the second.
+              DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
+              ToErase = &MI;
+              Simplified = true;
+              DEBUG(dbgs() << "Removing redundant splat: ");
+              DEBUG(MI.dump());
             }
           }
         }
         break;
       }
+      case PPC::VSPLTB:
+      case PPC::VSPLTH:
+      case PPC::XXSPLTW: {
+        unsigned MyOpcode = MI.getOpcode();
+        unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
+        unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
+        MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
+        if (!DefMI)
+          break;
+        unsigned DefOpcode = DefMI->getOpcode();
+        bool SameOpcode = (MyOpcode == DefOpcode) ||
+          (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
+          (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
+          (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs);
+        // Splat fed by another splat - switch the output of the first
+        // and remove the second.
+        if (SameOpcode) {
+          DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
+          ToErase = &MI;
+          Simplified = true;
+          DEBUG(dbgs() << "Removing redundant splat: ");
+          DEBUG(MI.dump());
+        }
+        // Splat fed by a shift. Usually when we align value to splat into
+        // vector element zero.
+        if (DefOpcode == PPC::XXSLDWI) {
+          unsigned ShiftRes = DefMI->getOperand(0).getReg();
+          unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
+          unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
+          unsigned ShiftImm = DefMI->getOperand(3).getImm();
+          unsigned SplatImm = MI.getOperand(2).getImm();
+          if (ShiftOp1 == ShiftOp2) {
+            unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
+            if (MRI->hasOneNonDBGUse(ShiftRes)) {
+              DEBUG(dbgs() << "Removing redundant shift: ");
+              DEBUG(DefMI->dump());
+              ToErase = DefMI;
+            }
+            Simplified = true;
+            DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
+                  " to " << NewElem << " in instruction: ");
+            DEBUG(MI.dump());
+            MI.getOperand(1).setReg(ShiftOp1);
+            MI.getOperand(2).setImm(NewElem);
+          }
+        }
+        break;
+      }
       }
     }
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2016-10-04 06:59:23 +0000
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2016-10-04 06:59:23 +0000
commit	11049f8f07330c30c689d54ed91d63909b360cf2 (patch)
tree	a70b349635cb8c9c5d445cc77595f2794e5e0dc4 /llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
parent	6b929d5ba91eec2acd6f47c6921ce0dff9a94cd6 (diff)
download	bcm5719-llvm-11049f8f07330c30c689d54ed91d63909b360cf2.tar.gz bcm5719-llvm-11049f8f07330c30c689d54ed91d63909b360cf2.zip