summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-09-30 04:31:33 +0000
committerCraig Topper <craig.topper@gmail.com>2016-09-30 04:31:33 +0000
commitbc6e97b8f43bc1d8340632947bab3cade6186df1 (patch)
treecd8288173b3f079735e35993c4c5ab423afa87d7
parentf57cc62abf4d2d5fec91716372d060662d678fc8 (diff)
downloadbcm5719-llvm-bc6e97b8f43bc1d8340632947bab3cade6186df1.tar.gz
bcm5719-llvm-bc6e97b8f43bc1d8340632947bab3cade6186df1.zip
[AVX-512] Always use the full 32 register vector classes for addRegisterClass regardless of whether AVX512/VLX is enabled or not.
If AVX512 is disabled, the registers should already be marked reserved. Pattern predicates and register classes on instructions should take care of most of the rest. Loads/stores and physical register copies for XMM16-31 and YMM16-31 without VLX have already been taken care of. I'm a little unclear why this changed the register allocation of the SSE2 run of the sad.ll test, but the registers selected appear to be valid after this change. llvm-svn: 282835
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp45
-rw-r--r--llvm/test/CodeGen/X86/sad.ll16
-rw-r--r--llvm/test/CodeGen/X86/vector-half-conversions.ll118
3 files changed, 81 insertions, 98 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 13a30822954..7b3f2f29d18 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -485,10 +485,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
// f32 and f64 use SSE.
// Set up the FP register classes.
- addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
- : &X86::FR32RegClass);
- addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
- : &X86::FR64RegClass);
+ addRegisterClass(MVT::f32, &X86::FR32XRegClass);
+ addRegisterClass(MVT::f64, &X86::FR64XRegClass);
for (auto VT : { MVT::f32, MVT::f64 }) {
// Use ANDPD to simulate FABS.
@@ -517,8 +515,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
} else if (UseX87 && X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
- addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
- : &X86::FR32RegClass);
+ addRegisterClass(MVT::f32, &X86::FR32XRegClass);
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
// Use ANDPS to simulate FABS.
@@ -721,8 +718,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
- addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
+ addRegisterClass(MVT::v4f32, &X86::VR128XRegClass);
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
setOperationAction(ISD::FABS, MVT::v4f32, Custom);
@@ -735,19 +731,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
- addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
+ addRegisterClass(MVT::v2f64, &X86::VR128XRegClass);
// FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
// registers cannot be used even for integer operations.
- addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
- addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
- addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
- addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
+ addRegisterClass(MVT::v16i8, &X86::VR128XRegClass);
+ addRegisterClass(MVT::v8i16, &X86::VR128XRegClass);
+ addRegisterClass(MVT::v4i32, &X86::VR128XRegClass);
+ addRegisterClass(MVT::v2i64, &X86::VR128XRegClass);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
@@ -955,18 +946,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
bool HasInt256 = Subtarget.hasInt256();
- addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
+ addRegisterClass(MVT::v32i8, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v16i16, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v8i32, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v8f32, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v4i64, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v4f64, &X86::VR256XRegClass);
for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll
index 07c07485c88..4c57d3bec8a 100644
--- a/llvm/test/CodeGen/X86/sad.ll
+++ b/llvm/test/CodeGen/X86/sad.ll
@@ -155,12 +155,12 @@ define i32 @sad_32i8() nounwind {
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm13, %xmm13
; SSE2-NEXT: pxor %xmm15, %xmm15
-; SSE2-NEXT: pxor %xmm5, %xmm5
; SSE2-NEXT: pxor %xmm14, %xmm14
+; SSE2-NEXT: pxor %xmm6, %xmm6
; SSE2-NEXT: .p2align 4, 0x90
; SSE2-NEXT: .LBB1_1: # %vector.body
; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
-; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) # 16-byte Spill
; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) # 16-byte Spill
; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) # 16-byte Spill
; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) # 16-byte Spill
@@ -252,11 +252,9 @@ define i32 @sad_32i8() nounwind {
; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; SSE2-NEXT: paddd %xmm3, %xmm4
; SSE2-NEXT: paddd %xmm6, %xmm0
-; SSE2-NEXT: paddd %xmm7, %xmm14
-; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload
-; SSE2-NEXT: paddd %xmm5, %xmm3
-; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) # 16-byte Spill
-; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload
+; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload
+; SSE2-NEXT: paddd %xmm7, %xmm6
+; SSE2-NEXT: paddd %xmm5, %xmm14
; SSE2-NEXT: paddd %xmm8, %xmm1
; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload
; SSE2-NEXT: paddd %xmm2, %xmm3
@@ -266,9 +264,9 @@ define i32 @sad_32i8() nounwind {
; SSE2-NEXT: jne .LBB1_1
; SSE2-NEXT: # BB#2: # %middle.block
; SSE2-NEXT: paddd %xmm15, %xmm4
-; SSE2-NEXT: paddd %xmm14, %xmm1
+; SSE2-NEXT: paddd %xmm6, %xmm1
; SSE2-NEXT: paddd %xmm13, %xmm0
-; SSE2-NEXT: paddd %xmm5, %xmm2
+; SSE2-NEXT: paddd %xmm14, %xmm2
; SSE2-NEXT: paddd %xmm4, %xmm1
; SSE2-NEXT: paddd %xmm2, %xmm1
; SSE2-NEXT: paddd %xmm0, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll
index 57705ad7101..d64b37c2ffc 100644
--- a/llvm/test/CodeGen/X86/vector-half-conversions.ll
+++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll
@@ -3350,69 +3350,69 @@ define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind {
;
; AVX512F-LABEL: cvt_16f32_to_16i16:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm2
-; AVX512F-NEXT: vmovd %xmm2, %eax
-; AVX512F-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2
-; AVX512F-NEXT: vmovd %eax, %xmm3
-; AVX512F-NEXT: vmovd %xmm2, %eax
+; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2
-; AVX512F-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm2, %eax
-; AVX512F-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm14
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm3, %ymm3
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm4 = xmm0[3,1,2,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm4, %ymm4
+; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm5, %ymm5
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm7
+; AVX512F-NEXT: vextractf128 $1, %ymm7, %xmm8
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm6 = xmm8[3,1,2,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm6, %ymm6
+; AVX512F-NEXT: vpermilpd {{.*#+}} xmm9 = xmm8[1,0]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm9, %ymm9
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm10 = xmm8[1,1,3,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm10, %ymm10
+; AVX512F-NEXT: vcvtps2ph $4, %zmm8, %ymm8
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm11 = xmm7[3,1,2,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm11, %ymm11
+; AVX512F-NEXT: vpermilpd {{.*#+}} xmm12 = xmm7[1,0]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm12, %ymm12
+; AVX512F-NEXT: vcvtps2ph $4, %zmm7, %ymm13
+; AVX512F-NEXT: vmovd %xmm13, %eax
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm7 = xmm7[1,1,3,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm7, %ymm7
+; AVX512F-NEXT: vmovd %eax, %xmm2
+; AVX512F-NEXT: vmovd %xmm7, %eax
+; AVX512F-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm12, %eax
+; AVX512F-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm11, %eax
+; AVX512F-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm8, %eax
+; AVX512F-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm10, %eax
+; AVX512F-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm9, %eax
+; AVX512F-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm6, %eax
+; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm6
+; AVX512F-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vmovd %xmm6, %eax
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0
+; AVX512F-NEXT: vmovd %eax, %xmm6
+; AVX512F-NEXT: vmovd %xmm0, %eax
+; AVX512F-NEXT: vpinsrw $1, %eax, %xmm6, %xmm0
+; AVX512F-NEXT: vmovd %xmm5, %eax
+; AVX512F-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm4, %eax
+; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm4
+; AVX512F-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm4, %eax
+; AVX512F-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm3, %eax
+; AVX512F-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm14, %eax
; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm1
-; AVX512F-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3
+; AVX512F-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1
-; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2
-; AVX512F-NEXT: vpinsrw $6, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm2, %eax
-; AVX512F-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT: vmovd %eax, %xmm3
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm1, %eax
-; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm0, %eax
-; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm0
-; AVX512F-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm0, %eax
-; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm0, %eax
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: vmovd %xmm0, %eax
-; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[3,1,2,3]
-; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT: vpinsrw $6, %eax, %xmm3, %xmm1
-; AVX512F-NEXT: vmovd %xmm0, %eax
-; AVX512F-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
+; AVX512F-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
OpenPOWER on IntegriCloud