summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp10
-rw-r--r--llvm/test/CodeGen/AMDGPU/ctlz.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/fneg-fabs.ll15
-rw-r--r--llvm/test/CodeGen/AMDGPU/half.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/or.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/s_movk_i32.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/si-literal-folding.ll9
10 files changed, 43 insertions, 39 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c84847f2e0e..5e0d34d8498 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1852,13 +1852,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
- case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
- case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
- case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
- case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
- case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
- case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
- case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
+ case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
+ case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
+ case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
+ case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
+ case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
+ case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
+ case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 94506f2fcd0..b8f3c10e618 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1871,7 +1871,7 @@ def : Pat <
def : Pat <
(fneg (fabs f32:$src)),
- (S_OR_B32 $src, 0x80000000) // Set sign bit
+ (S_OR_B32 $src, (S_MOV_B32 0x80000000)) // Set sign bit
>;
// FIXME: Should use S_OR_B32
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index c891af7a08f..e72b7d496ab 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -134,7 +134,6 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
MachineOperand &Src0 = MI.getOperand(Src0Idx);
@@ -144,12 +143,6 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
return;
- // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
- // SGPR, we cannot commute the instruction, so we can't fold any literal
- // constants.
- if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
- return;
-
// Try to fold Src0
if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
unsigned Reg = Src0.getReg();
@@ -158,7 +151,8 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
MachineOperand &MovSrc = Def->getOperand(1);
bool ConstantFolded = false;
- if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
+ if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
+ isUInt<32>(MovSrc.getImm()))) {
Src0.ChangeToImmediate(MovSrc.getImm());
ConstantFolded = true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index 2d5e9f4001d..e9d26a225e3 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -143,7 +143,7 @@ define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind
; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]]
-; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]]
+; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]]
; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]
; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc
; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
index 898acc36099..1362fa7a908 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -82,8 +82,10 @@ define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in)
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: -PV
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; FIXME: In this case two uses of the constant should be folded
+; SI: s_mov_b32 [[SIGNBITK:s[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
%fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
@@ -92,10 +94,11 @@ define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
}
; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; SI: s_mov_b32 [[SIGNBITK:s[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
%fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll
index d21d66176a1..aa1f5b7362d 100644
--- a/llvm/test/CodeGen/AMDGPU/half.ll
+++ b/llvm/test/CodeGen/AMDGPU/half.ll
@@ -16,7 +16,7 @@ define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
; GCN: v_lshlrev_b32_e32 [[HI:v[0-9]+]], 16, [[V1]]
-; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[V0]], [[HI]]
+; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[HI]], [[V0]]
; GCN: buffer_store_dword [[PACKED]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN: s_endpgm
define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
@@ -440,7 +440,7 @@ define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspa
; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[CVT1]]
-; GCN-DAG: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT0]], [[SHL]]
+; GCN-DAG: v_or_b32_e32 [[PACKED:v[0-9]+]], [[SHL]], [[CVT0]]
; GCN-DAG: buffer_store_dword [[PACKED]]
; GCN: s_endpgm
define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
index 4c559f42d42..7b1373b13f3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
@@ -10,9 +10,7 @@ declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
; VI: s_load_dword [[SRC:s[0-9]+]]
; VI-DAG: v_rsq_f32_e32 [[RSQ:v[0-9]+]], [[SRC]]
; VI-DAG: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
-; TODO: this constant should be folded:
-; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff7fffff
-; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[K]]
+; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xff7fffff, [[MIN]]
; VI: buffer_store_dword [[RESULT]]
define void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 {
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
diff --git a/llvm/test/CodeGen/AMDGPU/or.ll b/llvm/test/CodeGen/AMDGPU/or.ll
index 9b90ff798ca..56f54cf7c5e 100644
--- a/llvm/test/CodeGen/AMDGPU/or.ll
+++ b/llvm/test/CodeGen/AMDGPU/or.ll
@@ -113,11 +113,9 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
}
; FUNC-LABEL: {{^}}vector_or_i64_loadimm:
-; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
-; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xdf77987f, v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x146f, v[[HI_VREG]]
; SI: s_endpgm
define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 8
diff --git a/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll
index 47c7fbb6dd6..e422270fc4a 100644
--- a/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}s_movk_i32_k0:
@@ -11,6 +11,7 @@ define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 4295032831)
ret void
}
@@ -24,6 +25,7 @@ define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 4295000063)
ret void
}
@@ -37,6 +39,7 @@ define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 274877939711)
ret void
}
@@ -50,6 +53,7 @@ define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 4295000064)
ret void
}
@@ -63,6 +67,7 @@ define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 4295098368)
ret void
}
@@ -77,6 +82,7 @@ define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 18374967954648334319)
ret void
}
@@ -90,6 +96,7 @@ define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 270582939713)
ret void
}
@@ -104,10 +111,10 @@ define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 70368744185856)
ret void
}
-
; SI-LABEL: {{^}}s_movk_i32_k8:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
@@ -119,6 +126,7 @@ define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 1229782942255906816)
ret void
}
@@ -133,6 +141,7 @@ define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 1229782942255906817)
ret void
}
@@ -147,6 +156,7 @@ define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 1229782942255909000)
ret void
}
@@ -161,6 +171,7 @@ define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 1229782942255910911)
ret void
}
@@ -175,5 +186,6 @@ define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 1229782942255902721)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll b/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll
index d5030adc89b..b3f000c8ccd 100644
--- a/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll
@@ -1,9 +1,8 @@
-; XFAIL: *
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
-
-; CHECK-LABEL: {{^}}main:
-; CHECK-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0xbf4353f8
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; GCN-LABEL: {{^}}main:
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
define amdgpu_vs void @main(float) {
main_body:
%1 = fmul float %0, 0x3FE86A7F00000000
OpenPOWER on IntegriCloud