diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-16 20:15:30 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-16 20:15:30 +0000 |
| commit | 1b69fd275d589f48ce63bea73e311b7ef89c99ba (patch) | |
| tree | bb2775b59c9c64710bb7ed48d30b21267c807a61 /llvm/lib/Target | |
| parent | b157dcacb5b96fd64900906911832a8ac3bb189e (diff) | |
| download | bcm5719-llvm-1b69fd275d589f48ce63bea73e311b7ef89c99ba.tar.gz bcm5719-llvm-1b69fd275d589f48ce63bea73e311b7ef89c99ba.zip | |
AMDGPU/GlobalISel: Select G_SHL
I think this manages to not break the DAG handling with the divergent
predicates because the stadalone divergent patterns end up with a
higher priority than the pattern on the instruction definition.
The 16-bit versions don't work yet.
llvm-svn: 366254
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SOPInstructions.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 2 |
3 files changed, 4 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index f46bee12604..b762b84d9ca 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -511,10 +511,10 @@ let AddedComplexity = 1 in { let Defs = [SCC] in { // TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3 def S_LSHL_B32 : SOP2_32 <"s_lshl_b32", - [(set i32:$sdst, (UniformBinFrag<shl> i32:$src0, i32:$src1))] + [(set SReg_32:$sdst, (shl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64", - [(set i64:$sdst, (UniformBinFrag<shl> i64:$src0, i32:$src1))] + [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHR_B32 : SOP2_32 <"s_lshr_b32", [(set i32:$sdst, (UniformBinFrag<srl> i32:$src0, i32:$src1))] diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index fa9b913c2de..260e8a498fb 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -474,7 +474,7 @@ defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">; defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">; -defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">; +defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">; defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>; defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index f7699e61d59..6ebb9557c3c 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -393,7 +393,7 @@ def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; } // End SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10] let SubtargetPredicate = isGFX8Plus in { -def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>; +def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>; def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>>; def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>; } // End SubtargetPredicate = isGFX8Plus |

