diff options
| author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-09-14 02:36:58 +0000 | 
|---|---|---|
| committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-09-14 02:36:58 +0000 | 
| commit | d560b8c8e9ff9b48bd06e85ac0f3bc3de17107be (patch) | |
| tree | 238fd45a45a75d080e8c613fb87c7d0edb3c3e66 /llvm | |
| parent | 333a59eced18579363b7d11a04f97bca6553edc7 (diff) | |
| download | bcm5719-llvm-d560b8c8e9ff9b48bd06e85ac0f3bc3de17107be.tar.gz bcm5719-llvm-d560b8c8e9ff9b48bd06e85ac0f3bc3de17107be.zip  | |
Teach the foldable tables about 128-bit AVX instructions and make the
alignment check for 256-bit classes more strict. There're no testcases
but we catch more folding cases for AVX while running single and multi
sources in the llvm testsuite.
Since some 128-bit AVX instructions have different number of operands
than their SSE counterparts, they are placed in different tables.
256-bit AVX instructions should also be added in the table soon. And
there a few more 128-bit versions to handled, which should come in
the following commits.
llvm-svn: 139687
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 305 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 39 | 
2 files changed, 294 insertions, 50 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 72c825ca0ba..d514cfa0d92 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -294,8 +294,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)      { X86::EXTRACTPSrr, X86::EXTRACTPSmr,   TB_FOLDED_STORE | TB_ALIGN_16 },      { X86::FsMOVAPDrr,  X86::MOVSDmr,       TB_FOLDED_STORE | TB_NO_REVERSE },      { X86::FsMOVAPSrr,  X86::MOVSSmr,       TB_FOLDED_STORE | TB_NO_REVERSE }, -    { X86::FsVMOVAPDrr, X86::VMOVSDmr,      TB_FOLDED_STORE | TB_NO_REVERSE }, -    { X86::FsVMOVAPSrr, X86::VMOVSSmr,      TB_FOLDED_STORE | TB_NO_REVERSE },      { X86::IDIV16r,     X86::IDIV16m,       TB_FOLDED_LOAD },      { X86::IDIV32r,     X86::IDIV32m,       TB_FOLDED_LOAD },      { X86::IDIV64r,     X86::IDIV64m,       TB_FOLDED_LOAD }, @@ -318,17 +316,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)      { X86::MOVAPDrr,    X86::MOVAPDmr,      TB_FOLDED_STORE | TB_ALIGN_16 },      { X86::MOVAPSrr,    X86::MOVAPSmr,      TB_FOLDED_STORE | TB_ALIGN_16 },      { X86::MOVDQArr,    X86::MOVDQAmr,      TB_FOLDED_STORE | TB_ALIGN_16 }, -    { X86::VMOVAPDYrr,  X86::VMOVAPDYmr,    TB_FOLDED_STORE | TB_ALIGN_32 }, -    { X86::VMOVAPSYrr,  X86::VMOVAPSYmr,    TB_FOLDED_STORE | TB_ALIGN_32 }, -    { X86::VMOVDQAYrr,  X86::VMOVDQAYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },      { X86::MOVPDI2DIrr, X86::MOVPDI2DImr,   TB_FOLDED_STORE },      { X86::MOVPQIto64rr,X86::MOVPQI2QImr,   TB_FOLDED_STORE },      { X86::MOVSDto64rr, X86::MOVSDto64mr,   TB_FOLDED_STORE },      { X86::MOVSS2DIrr,  X86::MOVSS2DImr,    TB_FOLDED_STORE },      { X86::MOVUPDrr,    X86::MOVUPDmr,      TB_FOLDED_STORE },      { X86::MOVUPSrr,    X86::MOVUPSmr,      TB_FOLDED_STORE }, -    { X86::VMOVUPDYrr,  X86::VMOVUPDYmr,    TB_FOLDED_STORE }, -    { X86::VMOVUPSYrr,  X86::VMOVUPSYmr,    TB_FOLDED_STORE },      { X86::MUL16r,      X86::MUL16m,        TB_FOLDED_LOAD },      { X86::MUL32r,      X86::MUL32m,        TB_FOLDED_LOAD },      { X86::MUL64r,      X86::MUL64m,        TB_FOLDED_LOAD }, @@ -354,7 +347,26 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)      { X86::TEST16ri,    X86::TEST16mi,      TB_FOLDED_LOAD },      { X86::TEST32ri,    X86::TEST32mi,      TB_FOLDED_LOAD },      { X86::TEST64ri32,  X86::TEST64mi32,    TB_FOLDED_LOAD }, -    { X86::TEST8ri,     X86::TEST8mi,       TB_FOLDED_LOAD } +    { X86::TEST8ri,     X86::TEST8mi,       TB_FOLDED_LOAD }, +    // AVX 128-bit versions of foldable instructions +    { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr,  TB_FOLDED_STORE | TB_ALIGN_16 }, +    { X86::FsVMOVAPDrr, X86::VMOVSDmr,      TB_FOLDED_STORE | TB_NO_REVERSE }, +    { X86::FsVMOVAPSrr, X86::VMOVSSmr,      TB_FOLDED_STORE | TB_NO_REVERSE }, +    { X86::VMOVAPDrr,   X86::VMOVAPDmr,     TB_FOLDED_STORE | TB_ALIGN_16 }, +    { X86::VMOVAPSrr,   X86::VMOVAPSmr,     TB_FOLDED_STORE | TB_ALIGN_16 }, +    { X86::VMOVDQArr,   X86::VMOVDQAmr,     TB_FOLDED_STORE | TB_ALIGN_16 }, +    { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr,  TB_FOLDED_STORE }, +    { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE }, +    { X86::VMOVSDto64rr,X86::VMOVSDto64mr,  TB_FOLDED_STORE }, +    { X86::VMOVSS2DIrr, X86::VMOVSS2DImr,   TB_FOLDED_STORE }, +    { X86::VMOVUPDrr,   X86::VMOVUPDmr,     TB_FOLDED_STORE }, +    { X86::VMOVUPSrr,   X86::VMOVUPSmr,     TB_FOLDED_STORE }, +    // AVX 256-bit foldable instructions +    { X86::VMOVAPDYrr,  X86::VMOVAPDYmr,    TB_FOLDED_STORE | TB_ALIGN_32 }, +    { X86::VMOVAPSYrr,  X86::VMOVAPSYmr,    TB_FOLDED_STORE | TB_ALIGN_32 }, +    { X86::VMOVDQAYrr,  X86::VMOVDQAYmr,    TB_FOLDED_STORE | TB_ALIGN_32 }, +    { X86::VMOVUPDYrr,  X86::VMOVUPDYmr,    TB_FOLDED_STORE }, +    { X86::VMOVUPSYrr,  X86::VMOVUPSYmr,    TB_FOLDED_STORE }    };    for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { @@ -382,8 +394,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)      { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm,         0 },      { X86::FsMOVAPDrr,      X86::MOVSDrm,             TB_NO_REVERSE },      { X86::FsMOVAPSrr,      X86::MOVSSrm,             TB_NO_REVERSE }, -    { X86::FsVMOVAPDrr,     X86::VMOVSDrm,            TB_NO_REVERSE }, -    { X86::FsVMOVAPSrr,     X86::VMOVSSrm,            TB_NO_REVERSE },      { X86::IMUL16rri,       X86::IMUL16rmi,           0 },      { X86::IMUL16rri8,      X86::IMUL16rmi8,          0 },      { X86::IMUL32rri,       X86::IMUL32rmi,           0 }, @@ -414,8 +424,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)      { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm,     0 },      { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm,       0 },      { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm,       0 }, -    { X86::Int_VUCOMISDrr,  X86::Int_VUCOMISDrm,      0 }, -    { X86::Int_VUCOMISSrr,  X86::Int_VUCOMISSrm,      0 },      { X86::MOV16rr,         X86::MOV16rm,             0 },      { X86::MOV32rr,         X86::MOV32rm,             0 },      { X86::MOV64rr,         X86::MOV64rm,             0 }, @@ -424,13 +432,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)      { X86::MOV8rr,          X86::MOV8rm,              0 },      { X86::MOVAPDrr,        X86::MOVAPDrm,            TB_ALIGN_16 },      { X86::MOVAPSrr,        X86::MOVAPSrm,            TB_ALIGN_16 }, -    { X86::VMOVAPDYrr,      X86::VMOVAPDYrm,          TB_ALIGN_32 }, -    { X86::VMOVAPSYrr,      X86::VMOVAPSYrm,          TB_ALIGN_32 },      { X86::MOVDDUPrr,       X86::MOVDDUPrm,           0 },      { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm,         0 },      { X86::MOVDI2SSrr,      X86::MOVDI2SSrm,          0 },      { X86::MOVDQArr,        X86::MOVDQArm,            TB_ALIGN_16 }, -    { X86::VMOVDQAYrr,      X86::VMOVDQAYrm,          TB_ALIGN_16 },      { X86::MOVSHDUPrr,      X86::MOVSHDUPrm,          TB_ALIGN_16 },      { X86::MOVSLDUPrr,      X86::MOVSLDUPrm,          TB_ALIGN_16 },      { X86::MOVSX16rr8,      X86::MOVSX16rm8,          0 }, @@ -441,8 +446,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)      { X86::MOVSX64rr8,      X86::MOVSX64rm8,          0 },      { X86::MOVUPDrr,        X86::MOVUPDrm,            TB_ALIGN_16 },      { X86::MOVUPSrr,        X86::MOVUPSrm,            0 }, -    { X86::VMOVUPDYrr,      X86::VMOVUPDYrm,          0 }, -    { X86::VMOVUPSYrr,      X86::VMOVUPSYrm,          0 },      { X86::MOVZDI2PDIrr,    X86::MOVZDI2PDIrm,        0 },      { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm,        0 },      { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm,     TB_ALIGN_16 }, @@ -477,8 +480,53 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)      // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0      { X86::UCOMISDrr,       X86::UCOMISDrm,           0 },      { X86::UCOMISSrr,       X86::UCOMISSrm,           0 }, +    // AVX 128-bit versions of foldable instructions +    { X86::Int_VCOMISDrr,   X86::Int_VCOMISDrm,       0 }, +    { X86::Int_VCOMISSrr,   X86::Int_VCOMISSrm,       0 }, +    { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm,     TB_ALIGN_16 }, +    { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm,     TB_ALIGN_16 }, +    { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm,     TB_ALIGN_16 }, +    { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm,     TB_ALIGN_16 }, +    { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm,     TB_ALIGN_16 }, +    { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm,     0 }, +    { X86::Int_VUCOMISDrr,  X86::Int_VUCOMISDrm,      0 }, +    { X86::Int_VUCOMISSrr,  X86::Int_VUCOMISSrm,      0 }, +    { X86::FsVMOVAPDrr,     X86::VMOVSDrm,            TB_NO_REVERSE }, +    { X86::FsVMOVAPSrr,     X86::VMOVSSrm,            TB_NO_REVERSE }, +    { X86::VMOV64toPQIrr,   X86::VMOVQI2PQIrm,        0 }, +    { X86::VMOV64toSDrr,    X86::VMOV64toSDrm,        0 }, +    { X86::VMOVAPDrr,       X86::VMOVAPDrm,           TB_ALIGN_16 }, +    { X86::VMOVAPSrr,       X86::VMOVAPSrm,           TB_ALIGN_16 }, +    { X86::VMOVDDUPrr,      X86::VMOVDDUPrm,          0 }, +    { X86::VMOVDI2PDIrr,    X86::VMOVDI2PDIrm,        0 }, +    { X86::VMOVDI2SSrr,     X86::VMOVDI2SSrm,         0 }, +    { X86::VMOVDQArr,       X86::VMOVDQArm,           TB_ALIGN_16 }, +    { X86::VMOVSLDUPrr,     X86::VMOVSLDUPrm,         TB_ALIGN_16 }, +    { X86::VMOVSHDUPrr,     X86::VMOVSHDUPrm,         TB_ALIGN_16 }, +    { X86::VMOVUPDrr,       X86::VMOVUPDrm,           TB_ALIGN_16 }, +    { X86::VMOVUPSrr,       X86::VMOVUPSrm,           0 }, +    { X86::VMOVZDI2PDIrr,   X86::VMOVZDI2PDIrm,       0 }, +    { X86::VMOVZQI2PQIrr,   X86::VMOVZQI2PQIrm,       0 }, +    { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm,    TB_ALIGN_16 }, +    { X86::VPSHUFDri,       X86::VPSHUFDmi,           TB_ALIGN_16 }, +    { X86::VPSHUFHWri,      X86::VPSHUFHWmi,          TB_ALIGN_16 }, +    { X86::VPSHUFLWri,      X86::VPSHUFLWmi,          TB_ALIGN_16 }, +    { X86::VRCPPSr,         X86::VRCPPSm,             TB_ALIGN_16 }, +    { X86::VRCPPSr_Int,     X86::VRCPPSm_Int,         TB_ALIGN_16 }, +    { X86::VRSQRTPSr,       X86::VRSQRTPSm,           TB_ALIGN_16 }, +    { X86::VRSQRTPSr_Int,   X86::VRSQRTPSm_Int,       TB_ALIGN_16 }, +    { X86::VSQRTPDr,        X86::VSQRTPDm,            TB_ALIGN_16 }, +    { X86::VSQRTPDr_Int,    X86::VSQRTPDm_Int,        TB_ALIGN_16 }, +    { X86::VSQRTPSr,        X86::VSQRTPSm,            TB_ALIGN_16 }, +    { X86::VSQRTPSr_Int,    X86::VSQRTPSm_Int,        TB_ALIGN_16 },      { X86::VUCOMISDrr,      X86::VUCOMISDrm,          0 }, -    { X86::VUCOMISSrr,      X86::VUCOMISSrm,          0 } +    { X86::VUCOMISSrr,      X86::VUCOMISSrm,          0 }, +    // AVX 256-bit foldable instructions +    { X86::VMOVAPDYrr,      X86::VMOVAPDYrm,          TB_ALIGN_32 }, +    { X86::VMOVAPSYrr,      X86::VMOVAPSYrm,          TB_ALIGN_32 }, +    { X86::VMOVDQAYrr,      X86::VMOVDQAYrm,          TB_ALIGN_16 }, +    { X86::VMOVUPDYrr,      X86::VMOVUPDYrm,          0 }, +    { X86::VMOVUPSYrr,      X86::VMOVUPSYrm,          0 }    };    for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -691,7 +739,155 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)      { X86::XOR64rr,         X86::XOR64rm,       0 },      { X86::XOR8rr,          X86::XOR8rm,        0 },      { X86::XORPDrr,         X86::XORPDrm,       TB_ALIGN_16 }, -    { X86::XORPSrr,         X86::XORPSrm,       TB_ALIGN_16 } +    { X86::XORPSrr,         X86::XORPSrm,       TB_ALIGN_16 }, +    // AVX 128-bit versions of foldable instructions +    { X86::VCVTSD2SSrr,       X86::VCVTSD2SSrm,        0 }, +    { X86::Int_VCVTSD2SSrr,   X86::Int_VCVTSD2SSrm,    0 }, +    { X86::VCVTSI2SD64rr,     X86::VCVTSI2SD64rm,      0 }, +    { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm,  0 }, +    { X86::VCVTSI2SDrr,       X86::VCVTSI2SDrm,        0 }, +    { X86::Int_VCVTSI2SDrr,   X86::Int_VCVTSI2SDrm,    0 }, +    { X86::VCVTSI2SS64rr,     X86::VCVTSI2SS64rm,      0 }, +    { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm,  0 }, +    { X86::VCVTSI2SSrr,       X86::VCVTSI2SSrm,        0 }, +    { X86::Int_VCVTSI2SSrr,   X86::Int_VCVTSI2SSrm,    0 }, +    { X86::VCVTSS2SDrr,       X86::VCVTSS2SDrm,        0 }, +    { X86::Int_VCVTSS2SDrr,   X86::Int_VCVTSS2SDrm,    0 }, +    { X86::VCVTTSD2SI64rr,    X86::VCVTTSD2SI64rm,     0 }, +    { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 }, +    { X86::VCVTTSD2SIrr,      X86::VCVTTSD2SIrm,       0 }, +    { X86::Int_VCVTTSD2SIrr,  X86::Int_VCVTTSD2SIrm,   0 }, +    { X86::VCVTTSS2SI64rr,    X86::VCVTTSS2SI64rm,     0 }, +    { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 }, +    { X86::VCVTTSS2SIrr,      X86::VCVTTSS2SIrm,       0 }, +    { X86::Int_VCVTTSS2SIrr,  X86::Int_VCVTTSS2SIrm,   0 }, +    { X86::VCVTSD2SI64rr,     X86::VCVTSD2SI64rm,      0 }, +    { X86::VCVTSD2SIrr,       X86::VCVTSD2SIrm,        0 }, +    { X86::VCVTTPD2DQrr,      X86::VCVTTPD2DQrm,       TB_ALIGN_16 }, +    { X86::VCVTTPS2DQrr,      X86::VCVTTPS2DQrm,       TB_ALIGN_16 }, +    { X86::VRSQRTSSr,         X86::VRSQRTSSm,          0 }, +    { X86::VSQRTSDr,          X86::VSQRTSDm,           0 }, +    { X86::VSQRTSSr,          X86::VSQRTSSm,           0 }, +    { X86::VADDPDrr,          X86::VADDPDrm,           TB_ALIGN_16 }, +    { X86::VADDPSrr,          X86::VADDPSrm,           TB_ALIGN_16 }, +    { X86::VADDSDrr,          X86::VADDSDrm,           0 }, +    { X86::VADDSSrr,          X86::VADDSSrm,           0 }, +    { X86::VADDSUBPDrr,       X86::VADDSUBPDrm,        TB_ALIGN_16 }, +    { X86::VADDSUBPSrr,       X86::VADDSUBPSrm,        TB_ALIGN_16 }, +    { X86::VANDNPDrr,         X86::VANDNPDrm,          TB_ALIGN_16 }, +    { X86::VANDNPSrr,         X86::VANDNPSrm,          TB_ALIGN_16 }, +    { X86::VANDPDrr,          X86::VANDPDrm,           TB_ALIGN_16 }, +    { X86::VANDPSrr,          X86::VANDPSrm,           TB_ALIGN_16 }, +    { X86::VCMPPDrri,         X86::VCMPPDrmi,          TB_ALIGN_16 }, +    { X86::VCMPPSrri,         X86::VCMPPSrmi,          TB_ALIGN_16 }, +    { X86::VCMPSDrr,          X86::VCMPSDrm,           0 }, +    { X86::VCMPSSrr,          X86::VCMPSSrm,           0 }, +    { X86::VDIVPDrr,          X86::VDIVPDrm,           TB_ALIGN_16 }, +    { X86::VDIVPSrr,          X86::VDIVPSrm,           TB_ALIGN_16 }, +    { X86::VDIVSDrr,          X86::VDIVSDrm,           0 }, +    { X86::VDIVSSrr,          X86::VDIVSSrm,           0 }, +    { X86::VFsANDNPDrr,       X86::VFsANDNPDrm,        TB_ALIGN_16 }, +    { X86::VFsANDNPSrr,       X86::VFsANDNPSrm,        TB_ALIGN_16 }, +    { X86::VFsANDPDrr,        X86::VFsANDPDrm,         TB_ALIGN_16 }, +    { X86::VFsANDPSrr,        X86::VFsANDPSrm,         TB_ALIGN_16 }, +    { X86::VFsORPDrr,         X86::VFsORPDrm,          TB_ALIGN_16 }, +    { X86::VFsORPSrr,         X86::VFsORPSrm,          TB_ALIGN_16 }, +    { X86::VFsXORPDrr,        X86::VFsXORPDrm,         TB_ALIGN_16 }, +    { X86::VFsXORPSrr,        X86::VFsXORPSrm,         TB_ALIGN_16 }, +    { X86::VHADDPDrr,         X86::VHADDPDrm,          TB_ALIGN_16 }, +    { X86::VHADDPSrr,         X86::VHADDPSrm,          TB_ALIGN_16 }, +    { X86::VHSUBPDrr,         X86::VHSUBPDrm,          TB_ALIGN_16 }, +    { X86::VHSUBPSrr,         X86::VHSUBPSrm,          TB_ALIGN_16 }, +    { X86::Int_VCMPSDrr,      X86::Int_VCMPSDrm,       0 }, +    { X86::Int_VCMPSSrr,      X86::Int_VCMPSSrm,       0 }, +    { X86::VMAXPDrr,          X86::VMAXPDrm,           TB_ALIGN_16 }, +    { X86::VMAXPDrr_Int,      X86::VMAXPDrm_Int,       TB_ALIGN_16 }, +    { X86::VMAXPSrr,          X86::VMAXPSrm,           TB_ALIGN_16 }, +    { X86::VMAXPSrr_Int,      X86::VMAXPSrm_Int,       TB_ALIGN_16 }, +    { X86::VMAXSDrr,          X86::VMAXSDrm,           0 }, +    { X86::VMAXSDrr_Int,      X86::VMAXSDrm_Int,       0 }, +    { X86::VMAXSSrr,          X86::VMAXSSrm,           0 }, +    { X86::VMAXSSrr_Int,      X86::VMAXSSrm_Int,       0 }, +    { X86::VMINPDrr,          X86::VMINPDrm,           TB_ALIGN_16 }, +    { X86::VMINPDrr_Int,      X86::VMINPDrm_Int,       TB_ALIGN_16 }, +    { X86::VMINPSrr,          X86::VMINPSrm,           TB_ALIGN_16 }, +    { X86::VMINPSrr_Int,      X86::VMINPSrm_Int,       TB_ALIGN_16 }, +    { X86::VMINSDrr,          X86::VMINSDrm,           0 }, +    { X86::VMINSDrr_Int,      X86::VMINSDrm_Int,       0 }, +    { X86::VMINSSrr,          X86::VMINSSrm,           0 }, +    { X86::VMINSSrr_Int,      X86::VMINSSrm_Int,       0 }, +    { X86::VMULPDrr,          X86::VMULPDrm,           TB_ALIGN_16 }, +    { X86::VMULPSrr,          X86::VMULPSrm,           TB_ALIGN_16 }, +    { X86::VMULSDrr,          X86::VMULSDrm,           0 }, +    { X86::VMULSSrr,          X86::VMULSSrm,           0 }, +    { X86::VORPDrr,           X86::VORPDrm,            TB_ALIGN_16 }, +    { X86::VORPSrr,           X86::VORPSrm,            TB_ALIGN_16 }, +    { X86::VPACKSSDWrr,       X86::VPACKSSDWrm,        TB_ALIGN_16 }, +    { X86::VPACKSSWBrr,       X86::VPACKSSWBrm,        TB_ALIGN_16 }, +    { X86::VPACKUSWBrr,       X86::VPACKUSWBrm,        TB_ALIGN_16 }, +    { X86::VPADDBrr,          X86::VPADDBrm,           TB_ALIGN_16 }, +    { X86::VPADDDrr,          X86::VPADDDrm,           TB_ALIGN_16 }, +    { X86::VPADDQrr,          X86::VPADDQrm,           TB_ALIGN_16 }, +    { X86::VPADDSBrr,         X86::VPADDSBrm,          TB_ALIGN_16 }, +    { X86::VPADDSWrr,         X86::VPADDSWrm,          TB_ALIGN_16 }, +    { X86::VPADDWrr,          X86::VPADDWrm,           TB_ALIGN_16 }, +    { X86::VPANDNrr,          X86::VPANDNrm,           TB_ALIGN_16 }, +    { X86::VPANDrr,           X86::VPANDrm,            TB_ALIGN_16 }, +    { X86::VPCMPEQBrr,        X86::VPCMPEQBrm,         TB_ALIGN_16 }, +    { X86::VPCMPEQDrr,        X86::VPCMPEQDrm,         TB_ALIGN_16 }, +    { X86::VPCMPEQWrr,        X86::VPCMPEQWrm,         TB_ALIGN_16 }, +    { X86::VPCMPGTBrr,        X86::VPCMPGTBrm,         TB_ALIGN_16 }, +    { X86::VPCMPGTDrr,        X86::VPCMPGTDrm,         TB_ALIGN_16 }, +    { X86::VPCMPGTWrr,        X86::VPCMPGTWrm,         TB_ALIGN_16 }, +    { X86::VPINSRWrri,        X86::VPINSRWrmi,         TB_ALIGN_16 }, +    { X86::VPMADDWDrr,        X86::VPMADDWDrm,         TB_ALIGN_16 }, +    { X86::VPMAXSWrr,         X86::VPMAXSWrm,          TB_ALIGN_16 }, +    { X86::VPMAXUBrr,         X86::VPMAXUBrm,          TB_ALIGN_16 }, +    { X86::VPMINSWrr,         X86::VPMINSWrm,          TB_ALIGN_16 }, +    { X86::VPMINUBrr,         X86::VPMINUBrm,          TB_ALIGN_16 }, +    { X86::VPMULDQrr,         X86::VPMULDQrm,          TB_ALIGN_16 }, +    { X86::VPMULHUWrr,        X86::VPMULHUWrm,         TB_ALIGN_16 }, +    { X86::VPMULHWrr,         X86::VPMULHWrm,          TB_ALIGN_16 }, +    { X86::VPMULLDrr,         X86::VPMULLDrm,          TB_ALIGN_16 }, +    { X86::VPMULLWrr,         X86::VPMULLWrm,          TB_ALIGN_16 }, +    { X86::VPMULUDQrr,        X86::VPMULUDQrm,         TB_ALIGN_16 }, +    { X86::VPORrr,            X86::VPORrm,             TB_ALIGN_16 }, +    { X86::VPSADBWrr,         X86::VPSADBWrm,          TB_ALIGN_16 }, +    { X86::VPSLLDrr,          X86::VPSLLDrm,           TB_ALIGN_16 }, +    { X86::VPSLLQrr,          X86::VPSLLQrm,           TB_ALIGN_16 }, +    { X86::VPSLLWrr,          X86::VPSLLWrm,           TB_ALIGN_16 }, +    { X86::VPSRADrr,          X86::VPSRADrm,           TB_ALIGN_16 }, +    { X86::VPSRAWrr,          X86::VPSRAWrm,           TB_ALIGN_16 }, +    { X86::VPSRLDrr,          X86::VPSRLDrm,           TB_ALIGN_16 }, +    { X86::VPSRLQrr,          X86::VPSRLQrm,           TB_ALIGN_16 }, +    { X86::VPSRLWrr,          X86::VPSRLWrm,           TB_ALIGN_16 }, +    { X86::VPSUBBrr,          X86::VPSUBBrm,           TB_ALIGN_16 }, +    { X86::VPSUBDrr,          X86::VPSUBDrm,           TB_ALIGN_16 }, +    { X86::VPSUBSBrr,         X86::VPSUBSBrm,          TB_ALIGN_16 }, +    { X86::VPSUBSWrr,         X86::VPSUBSWrm,          TB_ALIGN_16 }, +    { X86::VPSUBWrr,          X86::VPSUBWrm,           TB_ALIGN_16 }, +    { X86::VPUNPCKHBWrr,      X86::VPUNPCKHBWrm,       TB_ALIGN_16 }, +    { X86::VPUNPCKHDQrr,      X86::VPUNPCKHDQrm,       TB_ALIGN_16 }, +    { X86::VPUNPCKHQDQrr,     X86::VPUNPCKHQDQrm,      TB_ALIGN_16 }, +    { X86::VPUNPCKHWDrr,      X86::VPUNPCKHWDrm,       TB_ALIGN_16 }, +    { X86::VPUNPCKLBWrr,      X86::VPUNPCKLBWrm,       TB_ALIGN_16 }, +    { X86::VPUNPCKLDQrr,      X86::VPUNPCKLDQrm,       TB_ALIGN_16 }, +    { X86::VPUNPCKLQDQrr,     X86::VPUNPCKLQDQrm,      TB_ALIGN_16 }, +    { X86::VPUNPCKLWDrr,      X86::VPUNPCKLWDrm,       TB_ALIGN_16 }, +    { X86::VPXORrr,           X86::VPXORrm,            TB_ALIGN_16 }, +    { X86::VSHUFPDrri,        X86::VSHUFPDrmi,         TB_ALIGN_16 }, +    { X86::VSHUFPSrri,        X86::VSHUFPSrmi,         TB_ALIGN_16 }, +    { X86::VSUBPDrr,          X86::VSUBPDrm,           TB_ALIGN_16 }, +    { X86::VSUBPSrr,          X86::VSUBPSrm,           TB_ALIGN_16 }, +    { X86::VSUBSDrr,          X86::VSUBSDrm,           0 }, +    { X86::VSUBSSrr,          X86::VSUBSSrm,           0 }, +    { X86::VUNPCKHPDrr,       X86::VUNPCKHPDrm,        TB_ALIGN_16 }, +    { X86::VUNPCKHPSrr,       X86::VUNPCKHPSrm,        TB_ALIGN_16 }, +    { X86::VUNPCKLPDrr,       X86::VUNPCKLPDrm,        TB_ALIGN_16 }, +    { X86::VUNPCKLPSrr,       X86::VUNPCKLPSrm,        TB_ALIGN_16 }, +    { X86::VXORPDrr,          X86::VXORPDrm,           TB_ALIGN_16 }, +    { X86::VXORPSrr,          X86::VXORPSrm,           TB_ALIGN_16 } +    // FIXME: add AVX 256-bit foldable instructions    };    for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { @@ -804,6 +1000,11 @@ static bool isFrameLoadOpcode(int Opcode) {    case X86::MOVAPSrm:    case X86::MOVAPDrm:    case X86::MOVDQArm: +  case X86::VMOVSSrm: +  case X86::VMOVSDrm: +  case X86::VMOVAPSrm: +  case X86::VMOVAPDrm: +  case X86::VMOVDQArm:    case X86::VMOVAPSYrm:    case X86::VMOVAPDYrm:    case X86::VMOVDQAYrm: @@ -828,6 +1029,11 @@ static bool isFrameStoreOpcode(int Opcode) {    case X86::MOVAPSmr:    case X86::MOVAPDmr:    case X86::MOVDQAmr: +  case X86::VMOVSSmr: +  case X86::VMOVSDmr: +  case X86::VMOVAPSmr: +  case X86::VMOVAPDmr: +  case X86::VMOVDQAmr:    case X86::VMOVAPSYmr:    case X86::VMOVAPDYmr:    case X86::VMOVDQAYmr: @@ -913,6 +1119,12 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,      case X86::MOVUPSrm:      case X86::MOVAPDrm:      case X86::MOVDQArm: +    case X86::VMOVSSrm: +    case X86::VMOVSDrm: +    case X86::VMOVAPSrm: +    case X86::VMOVUPSrm: +    case X86::VMOVAPDrm: +    case X86::VMOVDQArm:      case X86::VMOVAPSYrm:      case X86::VMOVUPSYrm:      case X86::VMOVAPDYrm: @@ -1922,7 +2134,8 @@ static bool isHReg(unsigned Reg) {  }  // Try and copy between VR128/VR64 and GR64 registers. -static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { +static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, +                                        bool HasAVX) {    // SrcReg(VR128) -> DestReg(GR64)    // SrcReg(VR64)  -> DestReg(GR64)    // SrcReg(GR64)  -> DestReg(VR128) @@ -1931,7 +2144,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {    if (X86::GR64RegClass.contains(DestReg)) {      if (X86::VR128RegClass.contains(SrcReg)) {        // Copy from a VR128 register to a GR64 register. -      return X86::MOVPQIto64rr; +      return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr;      } else if (X86::VR64RegClass.contains(SrcReg)) {        // Copy from a VR64 register to a GR64 register.        return X86::MOVSDto64rr; @@ -1939,7 +2152,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {    } else if (X86::GR64RegClass.contains(SrcReg)) {      // Copy from a GR64 register to a VR128 register.      if (X86::VR128RegClass.contains(DestReg)) -      return X86::MOV64toPQIrr; +      return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr;      // Copy from a GR64 register to a VR64 register.      else if (X86::VR64RegClass.contains(DestReg))        return X86::MOV64toSDrr; @@ -1953,6 +2166,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,                                 unsigned DestReg, unsigned SrcReg,                                 bool KillSrc) const {    // First deal with the normal symmetric copies. +  bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();    unsigned Opc = 0;    if (X86::GR64RegClass.contains(DestReg, SrcReg))      Opc = X86::MOV64rr; @@ -1969,14 +2183,13 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,      else        Opc = X86::MOV8rr;    } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) -    Opc = TM.getSubtarget<X86Subtarget>().hasAVX() ? -          X86::VMOVAPSrr : X86::MOVAPSrr; +    Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;    else if (X86::VR256RegClass.contains(DestReg, SrcReg))      Opc = X86::VMOVAPSYrr;    else if (X86::VR64RegClass.contains(DestReg, SrcReg))      Opc = X86::MMX_MOVQ64rr;    else -    Opc = CopyToFromAsymmetricReg(DestReg, SrcReg); +    Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, HasAVX);    if (Opc) {      BuildMI(MBB, MI, DL, get(Opc), DestReg) @@ -2020,6 +2233,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,                                        bool isStackAligned,                                        const TargetMachine &TM,                                        bool load) { +  bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();    switch (RC->getSize()) {    default:      llvm_unreachable("Unknown spill size"); @@ -2038,7 +2252,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,      if (X86::GR32RegClass.hasSubClassEq(RC))        return load ? X86::MOV32rm : X86::MOV32mr;      if (X86::FR32RegClass.hasSubClassEq(RC)) -      return load ? X86::MOVSSrm : X86::MOVSSmr; +      return load ? +        (HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) : +        (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);      if (X86::RFP32RegClass.hasSubClassEq(RC))        return load ? X86::LD_Fp32m : X86::ST_Fp32m;      llvm_unreachable("Unknown 4-byte regclass"); @@ -2046,7 +2262,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,      if (X86::GR64RegClass.hasSubClassEq(RC))        return load ? X86::MOV64rm : X86::MOV64mr;      if (X86::FR64RegClass.hasSubClassEq(RC)) -      return load ? X86::MOVSDrm : X86::MOVSDmr; +      return load ? +        (HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) : +        (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);      if (X86::VR64RegClass.hasSubClassEq(RC))        return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;      if (X86::RFP64RegClass.hasSubClassEq(RC)) @@ -2057,7 +2275,6 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,      return load ? X86::LD_Fp80m : X86::ST_FpP80m;    case 16: {      assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass"); -    bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();      // If stack is realigned we can use aligned stores.      if (isStackAligned)        return load ? @@ -2101,7 +2318,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,    const MachineFunction &MF = *MBB.getParent();    assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&           "Stack slot too small for store"); -  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || +  unsigned Alignment = RC->getSize() == 32 ? 32 : 16; +  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||      RI.canRealignStack(MF);    unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);    DebugLoc DL = MBB.findDebugLoc(MI); @@ -2116,7 +2334,9 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,                                    MachineInstr::mmo_iterator MMOBegin,                                    MachineInstr::mmo_iterator MMOEnd,                                    SmallVectorImpl<MachineInstr*> &NewMIs) const { -  bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; +  unsigned Alignment = RC->getSize() == 32 ? 32 : 16; +  bool isAligned = MMOBegin != MMOEnd && +                   (*MMOBegin)->getAlignment() >= Alignment;    unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);    DebugLoc DL;    MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -2134,7 +2354,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,                                          const TargetRegisterClass *RC,                                          const TargetRegisterInfo *TRI) const {    const MachineFunction &MF = *MBB.getParent(); -  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || +  unsigned Alignment = RC->getSize() == 32 ? 32 : 16; +  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||      RI.canRealignStack(MF);    unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);    DebugLoc DL = MBB.findDebugLoc(MI); @@ -2147,7 +2368,9 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,                                   MachineInstr::mmo_iterator MMOBegin,                                   MachineInstr::mmo_iterator MMOEnd,                                   SmallVectorImpl<MachineInstr*> &NewMIs) const { -  bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; +  unsigned Alignment = RC->getSize() == 32 ? 32 : 16; +  bool isAligned = MMOBegin != MMOEnd && +                   (*MMOBegin)->getAlignment() >= Alignment;    unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);    DebugLoc DL;    MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2768,7 +2991,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,          !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())        // Do not introduce a slow unaligned load.        return false; -    bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; +    unsigned Alignment = RC->getSize() == 32 ? 32 : 16; +    bool isAligned = (*MMOs.first) && +                     (*MMOs.first)->getAlignment() >= Alignment;      Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,                                VT, MVT::Other, &AddrOps[0], AddrOps.size());      NewNodes.push_back(Load); @@ -2810,7 +3035,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,          !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())        // Do not introduce a slow unaligned store.        return false; -    bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; +    unsigned Alignment = RC->getSize() == 32 ? 32 : 16; +    bool isAligned = (*MMOs.first) && +                     (*MMOs.first)->getAlignment() >= Alignment;      SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,                                                           isAligned, TM),                                         dl, MVT::Other, @@ -2871,6 +3098,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,    case X86::MOVAPDrm:    case X86::MOVDQArm:    case X86::MOVDQUrm: +  case X86::VMOVAPSrm: +  case X86::VMOVUPSrm: +  case X86::VMOVAPDrm: +  case X86::VMOVDQArm: +  case X86::VMOVDQUrm:    case X86::VMOVAPSYrm:    case X86::VMOVUPSYrm:    case X86::VMOVAPDYrm: @@ -2900,6 +3132,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,    case X86::MOVAPDrm:    case X86::MOVDQArm:    case X86::MOVDQUrm: +  case X86::VMOVAPSrm: +  case X86::VMOVUPSrm: +  case X86::VMOVAPDrm: +  case X86::VMOVDQArm: +  case X86::VMOVDQUrm:    case X86::VMOVAPSYrm:    case X86::VMOVUPSYrm:    case X86::VMOVAPDYrm: diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index fed7d35f025..6b350a00390 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1799,16 +1799,15 @@ let Predicates = [HasAVX] in {              (VCVTTPS2DQYrr VR256:$src)>;  } -def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), -                            (ins VR128:$src), -                          "cvttpd2dq\t{$src, $dst|$dst, $src}", -                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>, -                       VEX; -def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), -                          (ins f128mem:$src), -                          "cvttpd2dq\t{$src, $dst|$dst, $src}", -                          [(set VR128:$dst, (int_x86_sse2_cvttpd2dq -                                             (memop addr:$src)))]>, VEX; +def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), +                        "cvttpd2dq\t{$src, $dst|$dst, $src}", +                        [(set VR128:$dst, +                              (int_x86_sse2_cvttpd2dq VR128:$src))]>, VEX; +let isCodeGenOnly = 1 in +def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), +                        "cvttpd2dq\t{$src, $dst|$dst, $src}", +                        [(set VR128:$dst, (int_x86_sse2_cvttpd2dq +                                               (memop addr:$src)))]>, VEX;  def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),                        "cvttpd2dq\t{$src, $dst|$dst, $src}",                        [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; @@ -1820,8 +1819,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),  // The assembler can recognize rr 256-bit instructions by seeing a ymm  // register, but the same isn't true when using memory operands instead.  // Provide other assembly rr and rm forms to address this explicitly. -def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), -                        "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;  def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),                            "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; @@ -4174,10 +4171,20 @@ def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),                         [(store (i32 (vector_extract (v4i32 VR128:$src),                                       (iPTR 0))), addr:$dst)]>; -def MOVPQIto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), -                         "mov{d|q}\t{$src, $dst|$dst, $src}", -                         [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), -                                           (iPTR 0)))]>; +//===---------------------------------------------------------------------===// +// Move Packed Doubleword Int first element to Doubleword Int +// +let isCodeGenOnly = 1 in +def VMOVPQIto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), +                          "mov{d|q}\t{$src, $dst|$dst, $src}", +                          [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), +                                                           (iPTR 0)))]>; + +def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), +                        "mov{d|q}\t{$src, $dst|$dst, $src}", +                        [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), +                                                         (iPTR 0)))]>; +  //===---------------------------------------------------------------------===//  // Bitcast FR64 <-> GR64  //  | 

