summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-11-01 21:00:59 +0000
committerCraig Topper <craig.topper@intel.com>2017-11-01 21:00:59 +0000
commit4e56ba271e808f5d93184440ee9345133697d749 (patch)
treec45701d658cfc8735ae35286d9b1775fba51d26d
parent98c6549e4acc8f42959a393aae9ba63d32f10f4b (diff)
downloadbcm5719-llvm-4e56ba271e808f5d93184440ee9345133697d749.tar.gz
bcm5719-llvm-4e56ba271e808f5d93184440ee9345133697d749.zip
[X86] Add custom code to EVEX to VEX pass to turn unmasked 128-bit VPALIGND/Q into VPALIGNR if the extended registers aren't being used.
This will enable us to prefer VALIGND/Q during shuffle lowering in order to get the extended register encoding space when BWI isn't available. But if we end up not using the extended registers we can switch VPALIGNR for the shorter VEX encoding. Differential Revision: https://reviews.llvm.org/D39401 llvm-svn: 317122
-rw-r--r--llvm/lib/Target/X86/X86EvexToVex.cpp21
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll8
-rw-r--r--llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp8
3 files changed, 32 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp
index 440efa3a2b7..744510a3a3b 100644
--- a/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/llvm/lib/Target/X86/X86EvexToVex.cpp
@@ -163,6 +163,25 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
return false;
}
+// Do any custom cleanup needed to finalize the conversion.
+static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
+ (void)NewOpc;
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case X86::VALIGNDZ128rri:
+ case X86::VALIGNDZ128rmi:
+ case X86::VALIGNQZ128rri:
+ case X86::VALIGNQZ128rmi:
+ assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
+ "Unexpected new opcode!");
+ unsigned Scale = (Opc == X86::VALIGNQZ128rri ||
+ Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
+ MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
+ Imm.setImm(Imm.getImm() * Scale);
+ break;
+ }
+}
+
// For EVEX instructions that can be encoded using VEX encoding
// replace them by the VEX encoding in order to reduce size.
@@ -223,6 +242,8 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
if (usesExtendedRegister(MI))
return false;
+ performCustomAdjustments(MI, NewOpc);
+
MI.setDesc(TII->get(NewOpc));
MI.setAsmPrinterFlag(AC_EVEX_2_VEX);
return true;
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
index 992c4a10887..b6723ee50b0 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
@@ -4689,8 +4689,8 @@ declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32,
define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: valignd $2, %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf3,0x7d,0x08,0x03,0xd9,0x02]
-; CHECK-NEXT: ## xmm3 = xmm1[2,3],xmm0[0,1]
+; CHECK-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
+; CHECK-NEXT: ## xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: valignd $2, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02]
; CHECK-NEXT: ## xmm2 {%k1} = xmm1[2,3],xmm0[0,1]
@@ -4730,8 +4730,8 @@ declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32,
define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: valignq $1, %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf3,0xfd,0x08,0x03,0xd9,0x01]
-; CHECK-NEXT: ## xmm3 = xmm1[1],xmm0[0]
+; CHECK-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
+; CHECK-NEXT: ## xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: valignq $1, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01]
; CHECK-NEXT: ## xmm2 {%k1} = xmm1[1],xmm0[0]
diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
index 7c2d4efa29b..848e59c0790 100644
--- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -148,7 +148,13 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
{"VINSERTF64x2Z256rm", "VINSERTF128rm", false},
{"VINSERTF64x2Z256rr", "VINSERTF128rr", false},
{"VINSERTI64x2Z256rm", "VINSERTI128rm", false},
- {"VINSERTI64x2Z256rr", "VINSERTI128rr", false}
+ {"VINSERTI64x2Z256rr", "VINSERTI128rr", false},
+
+ // These will require some custom adjustment in the conversion pass.
+ {"VALIGNDZ128rri", "VPALIGNRrri", true},
+ {"VALIGNQZ128rri", "VPALIGNRrri", true},
+ {"VALIGNDZ128rmi", "VPALIGNRrmi", true},
+ {"VALIGNQZ128rmi", "VPALIGNRrmi", true},
};
// Print the manually added entries
OpenPOWER on IntegriCloud