[X86] Add custom code to EVEX to VEX pass to turn unmasked 128-bit VPALIGND/Q into VPALIGNR if the extended registers aren't being used.

This will enable us to prefer VALIGND/Q during shuffle lowering in order to get the extended register encoding space when BWI isn't available. But if we end up not using the extended registers we can switch VPALIGNR for the shorter VEX encoding. Differential Revision: https://reviews.llvm.org/D39401 llvm-svn: 317122
author: Craig Topper <craig.topper@intel.com> 2017-11-01 21:00:59 +0000
committer: Craig Topper <craig.topper@intel.com> 2017-11-01 21:00:59 +0000
commit: 4e56ba271e808f5d93184440ee9345133697d749 (patch)
tree: c45701d658cfc8735ae35286d9b1775fba51d26d
parent: 98c6549e4acc8f42959a393aae9ba63d32f10f4b (diff)
download: bcm5719-llvm-4e56ba271e808f5d93184440ee9345133697d749.tar.gz
bcm5719-llvm-4e56ba271e808f5d93184440ee9345133697d749.zip
3 files changed, 32 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp
index 440efa3a2b7..744510a3a3b 100644
--- a/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/llvm/lib/Target/X86/X86EvexToVex.cpp
@@ -163,6 +163,25 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
   return false;
 }
 
+// Do any custom cleanup needed to finalize the conversion.
+static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
+  (void)NewOpc;
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+  case X86::VALIGNDZ128rri:
+  case X86::VALIGNDZ128rmi:
+  case X86::VALIGNQZ128rri:
+  case X86::VALIGNQZ128rmi:
+    assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
+           "Unexpected new opcode!");
+    unsigned Scale = (Opc == X86::VALIGNQZ128rri ||
+                      Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
+    MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
+    Imm.setImm(Imm.getImm() * Scale);
+    break;
+  }
+}
+
 
 // For EVEX instructions that can be encoded using VEX encoding
 // replace them by the VEX encoding in order to reduce size.
@@ -223,6 +242,8 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
   if (usesExtendedRegister(MI))
     return false;
 
+  performCustomAdjustments(MI, NewOpc);
+
   MI.setDesc(TII->get(NewOpc));
   MI.setAsmPrinterFlag(AC_EVEX_2_VEX);
   return true;
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
index 992c4a10887..b6723ee50b0 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
@@ -4689,8 +4689,8 @@ declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32,
 define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    valignd $2, %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf3,0x7d,0x08,0x03,0xd9,0x02]
-; CHECK-NEXT:    ## xmm3 = xmm1[2,3],xmm0[0,1]
+; CHECK-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
+; CHECK-NEXT:    ## xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    valignd $2, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02]
 ; CHECK-NEXT:    ## xmm2 {%k1} = xmm1[2,3],xmm0[0,1]
@@ -4730,8 +4730,8 @@ declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32,
 define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    valignq $1, %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf3,0xfd,0x08,0x03,0xd9,0x01]
-; CHECK-NEXT:    ## xmm3 = xmm1[1],xmm0[0]
+; CHECK-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
+; CHECK-NEXT:    ## xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    valignq $1, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01]
 ; CHECK-NEXT:    ## xmm2 {%k1} = xmm1[1],xmm0[0]
diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
index 7c2d4efa29b..848e59c0790 100644
--- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -148,7 +148,13 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
       {"VINSERTF64x2Z256rm",  "VINSERTF128rm",   false},
       {"VINSERTF64x2Z256rr",  "VINSERTF128rr",   false},
       {"VINSERTI64x2Z256rm",  "VINSERTI128rm",   false},
-      {"VINSERTI64x2Z256rr",  "VINSERTI128rr",   false}
+      {"VINSERTI64x2Z256rr",  "VINSERTI128rr",   false},
+
+      // These will require some custom adjustment in the conversion pass.
+      {"VALIGNDZ128rri",      "VPALIGNRrri",     true},
+      {"VALIGNQZ128rri",      "VPALIGNRrri",     true},
+      {"VALIGNDZ128rmi",      "VPALIGNRrmi",     true},
+      {"VALIGNQZ128rmi",      "VPALIGNRrmi",     true},
   };
 
   // Print the manually added entries
author	Craig Topper <craig.topper@intel.com>	2017-11-01 21:00:59 +0000
committer	Craig Topper <craig.topper@intel.com>	2017-11-01 21:00:59 +0000
commit	4e56ba271e808f5d93184440ee9345133697d749 (patch)
tree	c45701d658cfc8735ae35286d9b1775fba51d26d
parent	98c6549e4acc8f42959a393aae9ba63d32f10f4b (diff)
download	bcm5719-llvm-4e56ba271e808f5d93184440ee9345133697d749.tar.gz bcm5719-llvm-4e56ba271e808f5d93184440ee9345133697d749.zip