summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-06-02 04:19:45 +0000
committerCraig Topper <craig.topper@gmail.com>2016-06-02 04:19:45 +0000
commitca9c0801e1040b91b2732547f78e983de715598e (patch)
tree0e0d971d5e44c630f4f29c210985b4f5314a708b
parent6611188633b2f90e83d92f09ffa1e6edd976c00d (diff)
downloadbcm5719-llvm-ca9c0801e1040b91b2732547f78e983de715598e.tar.gz
bcm5719-llvm-ca9c0801e1040b91b2732547f78e983de715598e.zip
[X86] Add AVX 256-bit load and stores to fast isel.
I'm not sure why this was missing for so long. This also exposed that we were picking floating point 256-bit VMOVNTPS for some integer types in normal isel for AVX1 even though VMOVNTDQ is available. In practice it doesn't matter due to the execution dependency fix pass, but it required extra isel patterns. Fixing that in a follow up commit. llvm-svn: 271481
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp61
-rw-r--r--llvm/test/CodeGen/X86/fast-isel-nontemporal.ll18
2 files changed, 61 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index fd4246fcfa9..4aa68295691 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -348,6 +348,7 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
MachineMemOperand *MMO, unsigned &ResultReg,
unsigned Alignment) {
+ bool HasAVX = Subtarget->hasAVX();
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
@@ -373,7 +374,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
break;
case MVT::f32:
if (X86ScalarSSEf32) {
- Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+ Opc = HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
RC = &X86::FR32RegClass;
} else {
Opc = X86::LD_Fp32m;
@@ -382,7 +383,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
break;
case MVT::f64:
if (X86ScalarSSEf64) {
- Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+ Opc = HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
RC = &X86::FR64RegClass;
} else {
Opc = X86::LD_Fp64m;
@@ -394,16 +395,16 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
return false;
case MVT::v4f32:
if (Alignment >= 16)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
+ Opc = HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
+ Opc = HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
RC = &X86::VR128RegClass;
break;
case MVT::v2f64:
if (Alignment >= 16)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
+ Opc = HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
+ Opc = HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
RC = &X86::VR128RegClass;
break;
case MVT::v4i32:
@@ -411,11 +412,29 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
case MVT::v8i16:
case MVT::v16i8:
if (Alignment >= 16)
- Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
+ Opc = HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
else
- Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
+ Opc = HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
RC = &X86::VR128RegClass;
break;
+ case MVT::v8f32:
+ assert(HasAVX);
+ Opc = (Alignment >= 32) ? X86::VMOVAPSYrm : X86::VMOVUPSYrm;
+ RC = &X86::VR256RegClass;
+ break;
+ case MVT::v4f64:
+ assert(HasAVX);
+ Opc = (Alignment >= 32) ? X86::VMOVAPDYrm : X86::VMOVUPDYrm;
+ RC = &X86::VR256RegClass;
+ break;
+ case MVT::v8i32:
+ case MVT::v4i64:
+ case MVT::v16i16:
+ case MVT::v32i8:
+ assert(HasAVX);
+ Opc = (Alignment >= 32) ? X86::VMOVDQAYrm : X86::VMOVDQUYrm;
+ RC = &X86::VR256RegClass;
+ break;
}
ResultReg = createResultReg(RC);
@@ -508,7 +527,31 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
else
Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
} else
- Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
+ Opc = HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
+ break;
+ case MVT::v8f32:
+ assert(HasAVX);
+ if (Aligned)
+ Opc = IsNonTemporal ? X86::VMOVNTPSYmr : X86::VMOVAPSYmr;
+ else
+ Opc = X86::VMOVUPSYmr;
+ break;
+ case MVT::v4f64:
+ assert(HasAVX);
+ if (Aligned) {
+ Opc = IsNonTemporal ? X86::VMOVNTPDYmr : X86::VMOVAPDYmr;
+ } else
+ Opc = X86::VMOVUPDYmr;
+ break;
+ case MVT::v8i32:
+ case MVT::v4i64:
+ case MVT::v16i16:
+ case MVT::v32i8:
+ assert(HasAVX);
+ if (Aligned)
+ Opc = IsNonTemporal ? X86::VMOVNTDQYmr : X86::VMOVDQAYmr;
+ else
+ Opc = X86::VMOVDQUYmr;
break;
}
diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
index 481b9789934..da82a7e3892 100644
--- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
@@ -260,7 +260,7 @@ define void @test_nt32xi8(<32 x i8>* nocapture %ptr, <32 x i8> %X) {
;
; AVX-LABEL: test_nt32xi8:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -282,7 +282,7 @@ define void @test_nt16xi16(<16 x i16>* nocapture %ptr, <16 x i16> %X) {
;
; AVX-LABEL: test_nt16xi16:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -304,7 +304,7 @@ define void @test_nt8xi32(<8 x i32>* nocapture %ptr, <8 x i32> %X) {
;
; AVX-LABEL: test_nt8xi32:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -404,8 +404,8 @@ define void @test_nt64xi8(<64 x i8>* nocapture %ptr, <64 x i8> %X) {
;
; AVX-LABEL: test_nt64xi8:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
-; AVX-NEXT: vmovntps %ymm1, 32(%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm1, 32(%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -435,8 +435,8 @@ define void @test_nt32xi16(<32 x i16>* nocapture %ptr, <32 x i16> %X) {
;
; AVX-LABEL: test_nt32xi16:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
-; AVX-NEXT: vmovntps %ymm1, 32(%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm1, 32(%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -466,8 +466,8 @@ define void @test_nt16xi32(<16 x i32>* nocapture %ptr, <16 x i32> %X) {
;
; AVX-LABEL: test_nt16xi32:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
-; AVX-NEXT: vmovntps %ymm1, 32(%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm1, 32(%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
OpenPOWER on IntegriCloud