diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/ctlz.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-fabs.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/half.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/or.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/s_movk_i32.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-literal-folding.ll | 9 |
10 files changed, 43 insertions, 39 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index c84847f2e0e..5e0d34d8498 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1852,13 +1852,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; - case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; - case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; - case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; - case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; - case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; - case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; - case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; + case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64; + case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64; + case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64; + case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64; + case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64; + case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64; + case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64; case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 94506f2fcd0..b8f3c10e618 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1871,7 +1871,7 @@ def : Pat < def : Pat < (fneg (fabs f32:$src)), - (S_OR_B32 $src, 0x80000000) // Set sign bit + (S_OR_B32 $src, (S_MOV_B32 0x80000000)) // Set sign bit >; // FIXME: Should use S_OR_B32 diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index c891af7a08f..e72b7d496ab 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -134,7 +134,6 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI)); - const SIRegisterInfo &TRI = TII->getRegisterInfo(); int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); MachineOperand &Src0 = MI.getOperand(Src0Idx); @@ -144,12 +143,6 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx))) return; - // Literal constants and SGPRs can only be used in Src0, so if Src0 is an - // SGPR, we cannot commute the instruction, so we can't fold any literal - // constants. - if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI)) - return; - // Try to fold Src0 if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) { unsigned Reg = Src0.getReg(); @@ -158,7 +151,8 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineOperand &MovSrc = Def->getOperand(1); bool ConstantFolded = false; - if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) { + if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || + isUInt<32>(MovSrc.getImm()))) { Src0.ChangeToImmediate(MovSrc.getImm()); ConstantFolded = true; } diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll index 2d5e9f4001d..e9d26a225e3 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll @@ -143,7 +143,7 @@ define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind ; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] ; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] ; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]] -; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]] +; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]] ; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]] ; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc ; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}} diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll index 898acc36099..1362fa7a908 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll @@ -82,8 +82,10 @@ define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) ; R600: |{{(PV|T[0-9])\.[XYZW]}}| ; R600: -PV -; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} -; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} +; FIXME: In this case two uses of the constant should be folded +; SI: s_mov_b32 [[SIGNBITK:s[0-9]+]], 0x80000000 +; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}} +; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}} define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) %fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs @@ -92,10 +94,11 @@ define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { } ; FUNC-LABEL: {{^}}fneg_fabs_v4f32: -; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} -; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} -; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} -; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} +; SI: s_mov_b32 [[SIGNBITK:s[0-9]+]], 0x80000000 +; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}} +; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}} +; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}} +; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}} define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in) %fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll index d21d66176a1..aa1f5b7362d 100644 --- a/llvm/test/CodeGen/AMDGPU/half.ll +++ b/llvm/test/CodeGen/AMDGPU/half.ll @@ -16,7 +16,7 @@ define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 { ; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 ; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46 ; GCN: v_lshlrev_b32_e32 [[HI:v[0-9]+]], 16, [[V1]] -; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[V0]], [[HI]] +; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[HI]], [[V0]] ; GCN: buffer_store_dword [[PACKED]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN: s_endpgm define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 { @@ -440,7 +440,7 @@ define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspa ; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]] ; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]] ; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[CVT1]] -; GCN-DAG: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT0]], [[SHL]] +; GCN-DAG: v_or_b32_e32 [[PACKED:v[0-9]+]], [[SHL]], [[CVT0]] ; GCN-DAG: buffer_store_dword [[PACKED]] ; GCN: s_endpgm define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll index 4c559f42d42..7b1373b13f3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll @@ -10,9 +10,7 @@ declare double @llvm.amdgcn.rsq.clamp.f64(double) #1 ; VI: s_load_dword [[SRC:s[0-9]+]] ; VI-DAG: v_rsq_f32_e32 [[RSQ:v[0-9]+]], [[SRC]] ; VI-DAG: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]] -; TODO: this constant should be folded: -; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff7fffff -; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[K]] +; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xff7fffff, [[MIN]] ; VI: buffer_store_dword [[RESULT]] define void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 { %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) diff --git a/llvm/test/CodeGen/AMDGPU/or.ll b/llvm/test/CodeGen/AMDGPU/or.ll index 9b90ff798ca..56f54cf7c5e 100644 --- a/llvm/test/CodeGen/AMDGPU/or.ll +++ b/llvm/test/CodeGen/AMDGPU/or.ll @@ -113,11 +113,9 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, } ; FUNC-LABEL: {{^}}vector_or_i64_loadimm: -; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f -; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, -; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] -; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xdf77987f, v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x146f, v[[HI_VREG]] ; SI: s_endpgm define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { %loada = load i64, i64 addrspace(1)* %a, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll index 47c7fbb6dd6..e422270fc4a 100644 --- a/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll +++ b/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}s_movk_i32_k0: @@ -11,6 +11,7 @@ define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32) store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 4295032831) ret void } @@ -24,6 +25,7 @@ define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32) store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 4295000063) ret void } @@ -37,6 +39,7 @@ define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32) store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 274877939711) ret void } @@ -50,6 +53,7 @@ define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32) store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 4295000064) ret void } @@ -63,6 +67,7 @@ define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32) store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 4295098368) ret void } @@ -77,6 +82,7 @@ define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 18374967954648334319) ret void } @@ -90,6 +96,7 @@ define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 270582939713 ; 65 | (63 << 32) store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 270582939713) ret void } @@ -104,10 +111,10 @@ define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32) store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 70368744185856) ret void } - ; SI-LABEL: {{^}}s_movk_i32_k8: ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}} ; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} @@ -119,6 +126,7 @@ define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000 store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 1229782942255906816) ret void } @@ -133,6 +141,7 @@ define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001 store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 1229782942255906817) ret void } @@ -147,6 +156,7 @@ define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888 store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 1229782942255909000) ret void } @@ -161,6 +171,7 @@ define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 1229782942255910911) ret void } @@ -175,5 +186,6 @@ define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad %loada = load i64, i64 addrspace(1)* %a, align 4 %or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001 store i64 %or, i64 addrspace(1)* %out + call void asm sideeffect "; use $0", "s"(i64 1229782942255902721) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll b/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll index d5030adc89b..b3f000c8ccd 100644 --- a/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll +++ b/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll @@ -1,9 +1,8 @@ -; XFAIL: * -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s - -; CHECK-LABEL: {{^}}main: -; CHECK-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0xbf4353f8 +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; GCN-LABEL: {{^}}main: +; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}} +; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}} define amdgpu_vs void @main(float) { main_body: %1 = fmul float %0, 0x3FE86A7F00000000 |