diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-07-10 00:49:49 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-07-10 00:49:49 +0000 |
| commit | 866a377e91bed6f4ee4a92a13ae0c18ef9d6c94a (patch) | |
| tree | 7e6cb2d724167c6b5fa45c3bb0a43877150e4440 | |
| parent | 59fd2f4c5257ba52a5d446935348ea6e70b38980 (diff) | |
| download | bcm5719-llvm-866a377e91bed6f4ee4a92a13ae0c18ef9d6c94a.tar.gz bcm5719-llvm-866a377e91bed6f4ee4a92a13ae0c18ef9d6c94a.zip | |
[X86] Correct vfixupimm load patterns to look for an integer load, not a floating point load bitcasted to integer.
DAG combine wouldn't let a floating point load bitcasted to integer exist. It would just be an integer load.
llvm-svn: 336626
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 41 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 6 |
2 files changed, 22 insertions, 25 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 1b29c9896b3..6830de62969 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -90,12 +90,6 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, !cast<ComplexPattern>("sse_load_f64"), ?)); - ValueType IntVT = !cast<ValueType>( - !if (!eq (!srl(EltSize,5),0), - VTName, - !if (!eq(TypeVariantName, "f"), - "v" # NumElts # "i" # EltSize, - VTName))); // The string to specify embedded broadcast in assembly. string BroadcastStr = "{1to" # NumElts # "}"; @@ -11364,14 +11358,15 @@ let Predicates = [HasVLX] in { //===----------------------------------------------------------------------===// multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, X86VectorVTInfo _>{ + X86FoldableSchedWrite sched, X86VectorVTInfo _, + X86VectorVTInfo TblVT>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (_.IntVT _.RC:$src3), + (TblVT.VT _.RC:$src3), (i32 imm:$src4), (i32 FROUND_CURRENT))>, Sched<[sched]>; defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), @@ -11379,7 +11374,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (_.IntVT (bitconvert (_.LdFrag addr:$src3))), + (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), (i32 imm:$src4), (i32 FROUND_CURRENT))>, Sched<[sched.Folded, ReadAfterLd]>; @@ -11389,7 +11384,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, "$src2, ${src3}"##_.BroadcastStr##", $src4", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), + (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))), (i32 imm:$src4), (i32 FROUND_CURRENT))>, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; @@ -11398,7 +11393,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, - X86VectorVTInfo _>{ + X86VectorVTInfo _, X86VectorVTInfo TblVT>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), @@ -11406,7 +11401,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { "$src2, $src3, {sae}, $src4", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (_.IntVT _.RC:$src3), + (TblVT.VT _.RC:$src3), (i32 imm:$src4), (i32 FROUND_NO_EXC))>, EVEX_B, Sched<[sched]>; @@ -11450,17 +11445,21 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, - AVX512VLVectorVTInfo _Vec> { + AVX512VLVectorVTInfo _Vec, + AVX512VLVectorVTInfo _Tbl> { let Predicates = [HasAVX512] in defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM, - _Vec.info512>, + _Vec.info512, _Tbl.info512>, avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM, - _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512; + _Vec.info512, _Tbl.info512>, AVX512AIi8Base, + EVEX_4V, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM, - _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128; + _Vec.info128, _Tbl.info128>, AVX512AIi8Base, + EVEX_4V, EVEX_V128; defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM, - _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256; + _Vec.info256, _Tbl.info256>, AVX512AIi8Base, + EVEX_4V, EVEX_V256; } } @@ -11470,10 +11469,10 @@ defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; -defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info>, - EVEX_CD8<32, CD8VF>; -defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info>, - EVEX_CD8<64, CD8VF>, VEX_W; +defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, + avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, + avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; // Patterns used to select SSE scalar fp arithmetic instructions from // either: diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 0cbe96e5998..bca1326bd8d 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -4162,8 +4162,7 @@ define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, < define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512_load(<8 x double> %x0, <8 x double> %x1, <8 x i64>* %x2ptr) { ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512_load: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovapd (%rdi), %zmm2 -; CHECK-NEXT: vfixupimmpd $3, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vfixupimmpd $3, (%rdi), %zmm1, %zmm0 ; CHECK-NEXT: retq %x2 = load <8 x i64>, <8 x i64>* %x2ptr %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 4) @@ -4265,8 +4264,7 @@ define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, < define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512_load(<16 x float> %x0, <16 x float> %x1, <16 x i32>* %x2ptr) { ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512_load: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovaps (%rdi), %zmm2 -; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vfixupimmps $5, (%rdi), %zmm1, %zmm0 ; CHECK-NEXT: retq %x2 = load <16 x i32>, <16 x i32>* %x2ptr %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4) |

