summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-03-01 22:22:31 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-03-01 22:22:31 +0000
commit90fd0622b63d2c7addb8b97d98e134ea63d3f037 (patch)
tree02450a7498214fb83d32a755a80ff54b3f98a58a
parent05ce1d3944394be8bba20d50c8da77ff3a1b657e (diff)
downloadbcm5719-llvm-90fd0622b63d2c7addb8b97d98e134ea63d3f037.tar.gz
bcm5719-llvm-90fd0622b63d2c7addb8b97d98e134ea63d3f037.zip
[X86][MMX] Improve handling of 64-bit MMX constants
64-bit MMX constant generation usually ends up lowering into SSE instructions before being spilled/reloaded as a MMX type. This patch bitcasts the constant to a double value to allow correct loading directly to the MMX register. I've added MMX constant asm comment support to improve testing, it's better to always print the double values as hex constants as MMX is mainly an integer unit (and even with 3DNow! its just floats). Differential Revision: https://reviews.llvm.org/D43616 llvm-svn: 326497
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp17
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp18
-rw-r--r--llvm/test/CodeGen/X86/fast-isel-bc.ll17
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-mmx.ll11
4 files changed, 33 insertions, 30 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8921bd44b34..56c6bd9d268 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30833,14 +30833,19 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// it's better to handle them early to be sure we emit efficient code by
// avoiding store-load conversions.
if (VT == MVT::x86mmx) {
- // Detect zero-extended MMX constant vectors.
+ // Detect MMX constant vectors.
APInt UndefElts;
- SmallVector<APInt, 2> EltBits;
- if (getTargetConstantBitsFromNode(N0, 32, UndefElts, EltBits) &&
- EltBits[1] == 0) {
+ SmallVector<APInt, 1> EltBits;
+ if (getTargetConstantBitsFromNode(N0, 64, UndefElts, EltBits)) {
SDLoc DL(N0);
- return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT,
- DAG.getConstant(EltBits[0], DL, MVT::i32));
+ // Handle zero-extension of i32 with MOVD.
+ if (EltBits[0].countLeadingZeros() >= 32)
+ return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT,
+ DAG.getConstant(EltBits[0].trunc(32), DL, MVT::i32));
+ // Else, bitcast to a double.
+ // TODO - investigate supporting sext 32-bit immediates on x86_64.
+ APFloat F64(APFloat::IEEEdouble(), EltBits[0]);
+ return DAG.getBitcast(VT, DAG.getConstantFP(F64, DL, MVT::f64));
}
// Detect bitcasts to x86mmx low word.
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 730ba745eb7..f5f87c1fdab 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1822,6 +1822,24 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
break;
}
+ case X86::MMX_MOVQ64rm: {
+ if (!OutStreamer->isVerboseAsm())
+ break;
+ if (MI->getNumOperands() <= 4)
+ break;
+ if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ const MachineOperand &DstOp = MI->getOperand(0);
+ CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
+ if (auto *CF = dyn_cast<ConstantFP>(C)) {
+ CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
+ OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+ }
+ }
+ break;
+ }
+
#define MOV_CASE(Prefix, Suffix) \
case X86::Prefix##MOVAPD##Suffix##rm: \
case X86::Prefix##MOVAPS##Suffix##rm: \
diff --git a/llvm/test/CodeGen/X86/fast-isel-bc.ll b/llvm/test/CodeGen/X86/fast-isel-bc.ll
index 3bc84c88ad8..3287f992cd0 100644
--- a/llvm/test/CodeGen/X86/fast-isel-bc.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-bc.ll
@@ -7,19 +7,12 @@
declare void @func2(x86_mmx)
; This isn't spectacular, but it's MMX code at -O0...
-; For now, handling of x86_mmx parameters in fast Isel is unimplemented,
-; so we get pretty poor code. The below is preferable.
-; CHEK: movl $2, %eax
-; CHEK: movd %rax, %mm0
-; CHEK: movd %mm0, %rdi
define void @func1() nounwind {
; X86-LABEL: func1:
; X86: ## %bb.0:
; X86-NEXT: subl $12, %esp
-; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-NEXT: movsd %xmm0, (%esp)
-; X86-NEXT: movq (%esp), %mm0
+; X86-NEXT: movq LCPI0_0, %mm0 ## mm0 = 0x200000000
; X86-NEXT: calll _func2
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
@@ -27,13 +20,7 @@ define void @func1() nounwind {
; X64-LABEL: func1:
; X64: ## %bb.0:
; X64-NEXT: pushq %rax
-; X64-NEXT: movl $2, %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: movq %rcx, %xmm0
-; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT: movq %xmm0, (%rsp)
-; X64-NEXT: movq (%rsp), %mm0
+; X64-NEXT: movq {{.*}}(%rip), %mm0 ## mm0 = 0x200000000
; X64-NEXT: movq2dq %mm0, %xmm0
; X64-NEXT: callq _func2
; X64-NEXT: popq %rax
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
index a2f6ecc25cc..a00df1456b7 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -33,25 +33,18 @@ define void @test1() {
; X32: ## %bb.0: ## %entry
; X32-NEXT: pushl %edi
; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: subl $8, %esp
-; X32-NEXT: .cfi_def_cfa_offset 16
; X32-NEXT: .cfi_offset %edi, -8
; X32-NEXT: pxor %mm0, %mm0
-; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: movsd %xmm0, (%esp)
-; X32-NEXT: movq (%esp), %mm1
+; X32-NEXT: movq LCPI1_0, %mm1 ## mm1 = 0x7070606040400000
; X32-NEXT: xorl %edi, %edi
; X32-NEXT: maskmovq %mm1, %mm0
-; X32-NEXT: addl $8, %esp
; X32-NEXT: popl %edi
; X32-NEXT: retl
;
; X64-LABEL: test1:
; X64: ## %bb.0: ## %entry
; X64-NEXT: pxor %mm0, %mm0
-; X64-NEXT: movq {{.*}}(%rip), %rax
-; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm1
+; X64-NEXT: movq {{.*}}(%rip), %mm1 ## mm1 = 0x7070606040400000
; X64-NEXT: xorl %edi, %edi
; X64-NEXT: maskmovq %mm1, %mm0
; X64-NEXT: retq
OpenPOWER on IntegriCloud