summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAMDGPU.td51
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td11
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp2
-rw-r--r--llvm/test/CodeGen/AMDGPU/big_alu.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll33
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll33
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll (renamed from llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll)92
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll (renamed from llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll)8
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll (renamed from llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll)24
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll (renamed from llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll)44
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll (renamed from llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll)23
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll (renamed from llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll)59
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.read.workdim.ll (renamed from llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll)31
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll (renamed from llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll)4
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll60
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll (renamed from llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll)6
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll36
-rw-r--r--llvm/test/CodeGen/AMDGPU/pv.ll20
-rw-r--r--llvm/test/CodeGen/AMDGPU/rcp-pattern.ll11
-rw-r--r--llvm/test/CodeGen/AMDGPU/sgpr-copy.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll18
-rw-r--r--llvm/test/Transforms/InstCombine/amdgcn-intrinsics.ll (renamed from llvm/test/Transforms/InstCombine/r600-intrinsics.ll)20
26 files changed, 381 insertions, 254 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 84582e8b992..a9a7a2789a7 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -11,6 +11,10 @@
//
//===----------------------------------------------------------------------===//
+class AMDGPUReadPreloadRegisterIntrinsic<string name>
+ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<name>;
+
let TargetPrefix = "r600" in {
class R600ReadPreloadRegisterIntrinsic<string name>
@@ -41,15 +45,30 @@ def int_r600_rat_store_typed :
Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>,
GCCBuiltin<"__builtin_r600_rat_store_typed">;
+def int_r600_rsq : Intrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+>;
+
+def int_r600_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
+ "__builtin_r600_read_workdim"
+>;
+
} // End TargetPrefix = "r600"
+// FIXME: These should be renamed/moved to r600
let TargetPrefix = "AMDGPU" in {
+def int_AMDGPU_rsq_clamped : Intrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+>;
-class AMDGPUReadPreloadRegisterIntrinsic<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<name>;
+def int_AMDGPU_ldexp : Intrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
+>;
+}
-def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
+let TargetPrefix = "amdgcn" in {
+
+def int_amdgcn_div_scale : GCCBuiltin<"__builtin_amdgcn_div_scale">,
// 1st parameter: Numerator
// 2nd parameter: Denominator
// 3rd parameter: Constant to select select between first and
@@ -58,43 +77,39 @@ def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
[IntrNoMem]>;
-def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
+def int_amdgcn_div_fmas : GCCBuiltin<"__builtin_amdgcn_div_fmas">,
Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
[IntrNoMem]>;
-def int_AMDGPU_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">,
+def int_amdgcn_div_fixup : GCCBuiltin<"__builtin_amdgcn_div_fixup">,
Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
-def int_AMDGPU_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">,
+def int_amdgcn_trig_preop : GCCBuiltin<"__builtin_amdgcn_trig_preop">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem]>;
-def int_AMDGPU_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">,
+def int_amdgcn_rcp : GCCBuiltin<"__builtin_amdgcn_rcp">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">,
+def int_amdgcn_rsq : GCCBuiltin<"__builtin_amdgcn_rsq">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
+def int_amdgcn_rsq_clamped : GCCBuiltin<"__builtin_amdgcn_rsq_clamped">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
+def int_amdgcn_ldexp : GCCBuiltin<"__builtin_amdgcn_ldexp">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
-def int_AMDGPU_class : GCCBuiltin<"__builtin_amdgpu_class">,
+def int_amdgcn_class : GCCBuiltin<"__builtin_amdgcn_class">,
Intrinsic<[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
- "__builtin_amdgpu_read_workdim">;
-
-} // End TargetPrefix = "AMDGPU"
+def int_amdgcn_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
+ "__builtin_amdgcn_read_workdim">;
-let TargetPrefix = "amdgcn" in {
-// SI only
def int_amdgcn_buffer_wbinvl1_sc :
GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
Intrinsic<[], [], []>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a211390c4b0..03d3a36b4bb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -925,7 +925,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
- case Intrinsic::AMDGPU_div_scale: {
+ case Intrinsic::amdgcn_div_scale: {
// 3rd parameter required to be a constant.
const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
if (!Param)
@@ -947,28 +947,29 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Denominator, Numerator);
}
- case Intrinsic::AMDGPU_div_fmas:
+ case Intrinsic::amdgcn_div_fmas:
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(4));
- case Intrinsic::AMDGPU_div_fixup:
+ case Intrinsic::amdgcn_div_fixup:
return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
- case Intrinsic::AMDGPU_trig_preop:
+ case Intrinsic::amdgcn_trig_preop:
return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
Op.getOperand(1), Op.getOperand(2));
- case Intrinsic::AMDGPU_rcp:
+ case Intrinsic::amdgcn_rcp:
return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
- case Intrinsic::AMDGPU_rsq:
+ case Intrinsic::amdgcn_rsq:
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+ case Intrinsic::amdgcn_rsq_clamped:
case Intrinsic::AMDGPU_rsq_clamped:
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Type *Type = VT.getTypeForEVT(*DAG.getContext());
@@ -984,7 +985,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
}
- case Intrinsic::AMDGPU_ldexp:
+ case Intrinsic::amdgcn_ldexp:
+ case Intrinsic::AMDGPU_ldexp: // Legacy name
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
Op.getOperand(2));
@@ -1039,7 +1041,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2));
- case Intrinsic::AMDGPU_class:
+ case Intrinsic::amdgcn_class:
return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
Op.getOperand(1), Op.getOperand(2));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
index d8701d13b55..ae564df966a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -61,10 +61,17 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ // Deprecated in favor of llvm.bitreverse
+ def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ // Deprecated in favor of llvm.amdgcn.s.barrier
def int_AMDGPU_barrier_local : Intrinsic<[], [], [IntrConvergent]>;
- def int_AMDGPU_barrier_global : Intrinsic<[], [], [IntrConvergent]>;
+ def int_AMDGPU_barrier_global : Intrinsic<[], [], [IntrConvergent]>;
+
+ // Deprecated in favor of llvm.amdgcn.read.workdim
+ def int_AMDGPU_read_workdim : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
}
// Legacy names for compatibility.
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index d6b6e197657..6c052b810b3 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -781,7 +781,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::r600_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
- case Intrinsic::AMDGPU_read_workdim: {
+ case Intrinsic::r600_read_workdim:
+ case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
}
@@ -804,7 +805,12 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Z, VT);
- case Intrinsic::AMDGPU_rsq:
+
+ // FIXME: Should be renamed to r600 prefix
+ case Intrinsic::AMDGPU_rsq_clamped:
+ return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
+
+ case Intrinsic::r600_rsq:
// XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index efd8075dde4..7ba546a66dc 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1312,7 +1312,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::r600_read_local_size_z:
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Z);
- case Intrinsic::AMDGPU_read_workdim:
+ case Intrinsic::amdgcn_read_workdim:
+ case AMDGPUIntrinsic::AMDGPU_read_workdim: // Legacy name.
// Really only 2 bits.
return lowerImplicitZextParam(DAG, Op, MVT::i8,
getImplicitParameterOffset(MFI, GRID_DIM));
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0f8aa5192ce..670ee6525de 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1614,7 +1614,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
- case Intrinsic::AMDGPU_rcp: {
+ case Intrinsic::amdgcn_rcp: {
if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
const APFloat &ArgVal = C->getValueAPF();
APFloat Val(ArgVal.getSemantics(), 1.0);
diff --git a/llvm/test/CodeGen/AMDGPU/big_alu.ll b/llvm/test/CodeGen/AMDGPU/big_alu.ll
index 2671c5d102b..7c98645123e 100644
--- a/llvm/test/CodeGen/AMDGPU/big_alu.ll
+++ b/llvm/test/CodeGen/AMDGPU/big_alu.ll
@@ -100,7 +100,7 @@ IF137: ; preds = %main_body
%88 = insertelement <4 x float> %87, float %32, i32 2
%89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
%90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89)
- %91 = call float @llvm.AMDGPU.rsq.f32(float %90)
+ %91 = call float @llvm.AMDGPU.rsq.clamped.f32(float %90)
%92 = fmul float %30, %91
%93 = fmul float %31, %91
%94 = fmul float %32, %91
@@ -343,7 +343,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15
%325 = insertelement <4 x float> %324, float %318, i32 2
%326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3
%327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326)
- %328 = call float @llvm.AMDGPU.rsq.f32(float %327)
+ %328 = call float @llvm.AMDGPU.rsq.clamped.f32(float %327)
%329 = fmul float %314, %328
%330 = fmul float %316, %328
%331 = fmul float %318, %328
@@ -376,7 +376,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15
%358 = insertelement <4 x float> %357, float %45, i32 2
%359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3
%360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359)
- %361 = call float @llvm.AMDGPU.rsq.f32(float %360)
+ %361 = call float @llvm.AMDGPU.rsq.clamped.f32(float %360)
%362 = fmul float %45, %361
%363 = call float @fabs(float %362)
%364 = fmul float %176, 0x3FECCCCCC0000000
@@ -402,7 +402,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15
%384 = insertelement <4 x float> %383, float %45, i32 2
%385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3
%386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385)
- %387 = call float @llvm.AMDGPU.rsq.f32(float %386)
+ %387 = call float @llvm.AMDGPU.rsq.clamped.f32(float %386)
%388 = fmul float %45, %387
%389 = call float @fabs(float %388)
%390 = fmul float %176, 0x3FF51EB860000000
@@ -1040,7 +1040,7 @@ IF179: ; preds = %ENDIF175
%896 = insertelement <4 x float> %895, float %45, i32 2
%897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3
%898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897)
- %899 = call float @llvm.AMDGPU.rsq.f32(float %898)
+ %899 = call float @llvm.AMDGPU.rsq.clamped.f32(float %898)
%900 = fmul float %45, %899
%901 = call float @fabs(float %900)
%902 = fmul float %176, 0x3FECCCCCC0000000
@@ -1149,7 +1149,7 @@ ENDIF178: ; preds = %ENDIF175, %IF179
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.f32(float) #1
+declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll
deleted file mode 100644
index d2a655bf909..00000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
-declare double @llvm.sqrt.f64(double) nounwind readnone
-
-; FUNC-LABEL: {{^}}rcp_f64:
-; SI: v_rcp_f64_e32
-define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
- %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
- store double %rcp, double addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}rcp_pat_f64:
-; SI: v_rcp_f64_e32
-define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
- %rcp = fdiv double 1.0, %src
- store double %rcp, double addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}rsq_rcp_pat_f64:
-; SI-UNSAFE: v_rsq_f64_e32
-; SI-SAFE-NOT: v_rsq_f64_e32
-; SI-SAFE: v_sqrt_f64
-; SI-SAFE: v_rcp_f64
-define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
- %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
- %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
- store double %rcp, double addrspace(1)* %out, align 8
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll
index eeff2536b23..622c11641aa 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll
@@ -2,6 +2,8 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; FIXME: Uses of this should be moved to llvm.amdgcn.rsq.clamped, and
+; an r600 variant added.
declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll
deleted file mode 100644
index 36b72f14db1..00000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone
-
-; FUNC-LABEL: {{^}}rsq_f32:
-; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-; EG: RECIPSQRT_IEEE
-define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind {
- %rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone
- store float %rsq, float addrspace(1)* %out, align 4
- ret void
-}
-
-; TODO: Really these should be constant folded
-; FUNC-LABEL: {{^}}rsq_f32_constant_4.0
-; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0
-; EG: RECIPSQRT_IEEE
-define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind {
- %rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone
- store float %rsq, float addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}rsq_f32_constant_100.0
-; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000
-; EG: RECIPSQRT_IEEE
-define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind {
- %rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone
- store float %rsq, float addrspace(1)* %out, align 4
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
index 80eb3b93f8e..37339215b0b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
-declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
+declare i1 @llvm.amdgcn.class.f32(float, i32) #1
+declare i1 @llvm.amdgcn.class.f64(double, i32) #1
declare i32 @llvm.r600.read.tidig.x() #1
declare float @llvm.fabs.f32(float) #1
declare double @llvm.fabs.f64(double) #1
@@ -15,7 +15,7 @@ declare double @llvm.fabs.f64(double) #1
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -31,7 +31,7 @@ define void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
; SI: s_endpgm
define void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
%a.fabs = call float @llvm.fabs.f32(float %a) #1
- %result = call i1 @llvm.AMDGPU.class.f32(float %a.fabs, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a.fabs, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -47,7 +47,7 @@ define void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
; SI: s_endpgm
define void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
%a.fneg = fsub float -0.0, %a
- %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -64,7 +64,7 @@ define void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
define void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
%a.fabs = call float @llvm.fabs.f32(float %a) #1
%a.fneg.fabs = fsub float -0.0, %a.fabs
- %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg.fabs, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg.fabs, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -77,7 +77,7 @@ define void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b)
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -90,7 +90,7 @@ define void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 {
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -105,7 +105,7 @@ define void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 {
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1023) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1023) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -119,7 +119,7 @@ define void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -138,7 +138,7 @@ define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -156,7 +156,7 @@ define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%b = load i32, i32 addrspace(1)* %gep.in
- %result = call i1 @llvm.AMDGPU.class.f32(float 1.0, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float 1.0, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -176,7 +176,7 @@ define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i3
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%b = load i32, i32 addrspace(1)* %gep.in
- %result = call i1 @llvm.AMDGPU.class.f32(float 1024.0, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float 1024.0, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -191,7 +191,7 @@ define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i3
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -207,7 +207,7 @@ define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
; SI: s_endpgm
define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
%a.fabs = call double @llvm.fabs.f64(double %a) #1
- %result = call i1 @llvm.AMDGPU.class.f64(double %a.fabs, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a.fabs, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -223,7 +223,7 @@ define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
; SI: s_endpgm
define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
%a.fneg = fsub double -0.0, %a
- %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -240,7 +240,7 @@ define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
%a.fabs = call double @llvm.fabs.f64(double %a) #1
%a.fneg.fabs = fsub double -0.0, %a.fabs
- %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg.fabs, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg.fabs, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -250,7 +250,7 @@ define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b)
; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 1{{$}}
; SI: s_endpgm
define void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 1) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 1) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -260,7 +260,7 @@ define void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 {
; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 64{{$}}
; SI: s_endpgm
define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 64) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 64) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -276,7 +276,7 @@ define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 {
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -296,7 +296,7 @@ define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load double, double addrspace(1)* %in
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -312,7 +312,7 @@ define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%b = load i32, i32 addrspace(1)* %gep.in
- %result = call i1 @llvm.AMDGPU.class.f64(double 1.0, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -327,7 +327,7 @@ define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i3
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%b = load i32, i32 addrspace(1)* %gep.in
- %result = call i1 @llvm.AMDGPU.class.f64(double 1024.0, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double 1024.0, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -344,8 +344,8 @@ define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 3) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 3) #1
%or = or i1 %class0, %class1
%sext = sext i1 %or to i32
@@ -364,9 +364,9 @@ define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
- %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1
+ %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
%or.0 = or i1 %class0, %class1
%or.1 = or i1 %or.0, %class2
@@ -387,16 +387,16 @@ define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float ad
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
- %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
- %class3 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
- %class4 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 16) #1
- %class5 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 32) #1
- %class6 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1
- %class7 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 128) #1
- %class8 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 256) #1
- %class9 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 512) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1
+ %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
+ %class3 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) #1
+ %class4 = call i1 @llvm.amdgcn.class.f32(float %a, i32 16) #1
+ %class5 = call i1 @llvm.amdgcn.class.f32(float %a, i32 32) #1
+ %class6 = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1
+ %class7 = call i1 @llvm.amdgcn.class.f32(float %a, i32 128) #1
+ %class8 = call i1 @llvm.amdgcn.class.f32(float %a, i32 256) #1
+ %class9 = call i1 @llvm.amdgcn.class.f32(float %a, i32 512) #1
%or.0 = or i1 %class0, %class1
%or.1 = or i1 %or.0, %class2
%or.2 = or i1 %or.1, %class3
@@ -422,8 +422,8 @@ define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) #1
%or = or i1 %class0, %class1
%sext = sext i1 %or to i32
@@ -442,8 +442,8 @@ define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1
%or = or i1 %class0, %class1
%sext = sext i1 %or to i32
@@ -462,8 +462,8 @@ define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %b, i32 8) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %b, i32 8) #1
%or = or i1 %class0, %class1
%sext = sext i1 %or to i32
@@ -477,7 +477,7 @@ define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 0) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 0) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -489,7 +489,7 @@ define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 {
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 0) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 0) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll
index 55ca9c7536e..f9b390eca0c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll
@@ -1,8 +1,8 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
-declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
-declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
+declare float @llvm.amdgcn.div.fixup.f32(float, float, float) nounwind readnone
+declare double @llvm.amdgcn.div.fixup.f64(double, double, double) nounwind readnone
; GCN-LABEL: {{^}}test_div_fixup_f32:
; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
@@ -17,7 +17,7 @@ declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readn
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
- %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -25,7 +25,7 @@ define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, fl
; GCN-LABEL: {{^}}test_div_fixup_f64:
; GCN: v_div_fixup_f64
define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
- %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
+ %result = call double @llvm.amdgcn.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
index 7dc094ed1b4..6bda39cf7c2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
@@ -4,8 +4,8 @@
; FIXME: Enable for VI.
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
-declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
+declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) nounwind readnone
+declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind readnone
; GCN-LABEL: {{^}}test_div_fmas_f32:
; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
@@ -21,7 +21,7 @@ declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind re
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -35,7 +35,7 @@ define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, flo
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -49,7 +49,7 @@ define void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, float %a,
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -63,7 +63,7 @@ define void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a,
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -71,7 +71,7 @@ define void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, float %a,
; GCN-LABEL: {{^}}test_div_fmas_f64:
; GCN: v_div_fmas_f64
define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
- %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
+ %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
}
@@ -81,7 +81,7 @@ define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b,
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind {
%cmp = icmp eq i32 %i, 0
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -90,7 +90,7 @@ define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, f
; SI: s_mov_b64 vcc, 0
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 false) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -99,7 +99,7 @@ define void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, f
; SI: s_mov_b64 vcc, -1
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 true) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -129,7 +129,7 @@ define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, flo
%cmp1 = icmp ne i32 %d, 0
%and = and i1 %cmp0, %cmp1
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %and) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %and) nounwind readnone
store float %result, float addrspace(1)* %gep.out, align 4
ret void
}
@@ -172,7 +172,7 @@ bb:
exit:
%cond = phi i1 [false, %entry], [%cmp1, %bb]
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone
store float %result, float addrspace(1)* %gep.out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
index de830de039c..1b4104c3576 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
@@ -1,8 +1,8 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
-declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
+declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) nounwind readnone
+declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) nounwind readnone
declare float @llvm.fabs.f32(float) nounwind readnone
; SI-LABEL @test_div_scale_f32_1:
@@ -19,7 +19,7 @@ define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)*
%a = load float, float addrspace(1)* %gep.0, align 4
%b = load float, float addrspace(1)* %gep.1, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -39,7 +39,7 @@ define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)*
%a = load float, float addrspace(1)* %gep.0, align 4
%b = load float, float addrspace(1)* %gep.1, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -59,7 +59,7 @@ define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)
%a = load double, double addrspace(1)* %gep.0, align 8
%b = load double, double addrspace(1)* %gep.1, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -79,7 +79,7 @@ define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)
%a = load double, double addrspace(1)* %gep.0, align 8
%b = load double, double addrspace(1)* %gep.1, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -97,7 +97,7 @@ define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float add
%b = load float, float addrspace(1)* %gep, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -115,7 +115,7 @@ define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float add
%b = load float, float addrspace(1)* %gep, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -133,7 +133,7 @@ define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float add
%a = load float, float addrspace(1)* %gep, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -151,7 +151,7 @@ define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float add
%a = load float, float addrspace(1)* %gep, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -169,7 +169,7 @@ define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double a
%b = load double, double addrspace(1)* %gep, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -187,7 +187,7 @@ define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double a
%b = load double, double addrspace(1)* %gep, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -205,7 +205,7 @@ define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double a
%a = load double, double addrspace(1)* %gep, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -223,7 +223,7 @@ define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double a
%a = load double, double addrspace(1)* %gep, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -237,7 +237,7 @@ define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double a
; SI: buffer_store_dword [[RESULT0]]
; SI: s_endpgm
define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a, float %b) nounwind {
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -251,7 +251,7 @@ define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a,
; SI: buffer_store_dword [[RESULT0]]
; SI: s_endpgm
define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a, float %b) nounwind {
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -266,7 +266,7 @@ define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a,
; SI: buffer_store_dwordx2 [[RESULT0]]
; SI: s_endpgm
define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double %a, double %b) nounwind {
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -281,7 +281,7 @@ define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double %
; SI: buffer_store_dwordx2 [[RESULT0]]
; SI: s_endpgm
define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %a, double %b) nounwind {
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -297,7 +297,7 @@ define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float a
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%a = load float, float addrspace(1)* %gep.0, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -313,7 +313,7 @@ define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float a
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%a = load float, float addrspace(1)* %gep.0, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -335,7 +335,7 @@ define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspa
%a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -357,7 +357,7 @@ define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspa
%b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll
index a59c0ce6d67..c48d52d150b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll
@@ -1,6 +1,9 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+declare float @llvm.amdgcn.ldexp.f32(float, i32) nounwind readnone
+declare double @llvm.amdgcn.ldexp.f64(double, i32) nounwind readnone
+
declare float @llvm.AMDGPU.ldexp.f32(float, i32) nounwind readnone
declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone
@@ -8,7 +11,7 @@ declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone
; SI: v_ldexp_f32
; SI: s_endpgm
define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind {
- %result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone
+ %result = call float @llvm.amdgcn.ldexp.f32(float %a, i32 %b) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -17,6 +20,24 @@ define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind
; SI: v_ldexp_f64
; SI: s_endpgm
define void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind {
+ %result = call double @llvm.amdgcn.ldexp.f64(double %a, i32 %b) nounwind readnone
+ store double %result, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: {{^}}test_legacy_ldexp_f32:
+; SI: v_ldexp_f32
+; SI: s_endpgm
+define void @test_legacy_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind {
+ %result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone
+ store float %result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_legacy_ldexp_f64:
+; SI: v_ldexp_f64
+; SI: s_endpgm
+define void @test_legacy_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind {
%result = call double @llvm.AMDGPU.ldexp.f64(double %a, i32 %b) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
index edd6e9a72f1..0988e43299c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@@ -5,32 +5,26 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+declare float @llvm.amdgcn.rcp.f32(float) #0
+declare double @llvm.amdgcn.rcp.f64(double) #0
-declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
-declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+declare double @llvm.sqrt.f64(double) #0
+declare float @llvm.sqrt.f32(float) #0
-declare float @llvm.sqrt.f32(float) nounwind readnone
; FUNC-LABEL: {{^}}rcp_f32:
; SI: v_rcp_f32_e32
-; EG: RECIP_IEEE
-define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
- %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
+define void @rcp_f32(float addrspace(1)* %out, float %src) #1 {
+ %rcp = call float @llvm.amdgcn.rcp.f32(float %src) #0
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
-; FIXME: Evergreen only ever does unsafe fp math.
; FUNC-LABEL: {{^}}rcp_pat_f32:
; SI-SAFE: v_rcp_f32_e32
; XSI-SAFE-SPDENORM-NOT: v_rcp_f32_e32
-
-; EG: RECIP_IEEE
-
-define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+define void @rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
%rcp = fdiv float 1.0, %src
store float %rcp, float addrspace(1)* %out, align 4
ret void
@@ -40,11 +34,40 @@ define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
; SI-UNSAFE: v_rsq_f32_e32
; SI-SAFE: v_sqrt_f32_e32
; SI-SAFE: v_rcp_f32_e32
-
-; EG: RECIPSQRT_IEEE
-define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
- %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone
- %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone
+define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
+ %sqrt = call float @llvm.sqrt.f32(float %src) #0
+ %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) #0
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}rcp_f64:
+; SI: v_rcp_f64_e32
+define void @rcp_f64(double addrspace(1)* %out, double %src) #1 {
+ %rcp = call double @llvm.amdgcn.rcp.f64(double %src) #0
+ store double %rcp, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rcp_pat_f64:
+; SI: v_rcp_f64_e32
+define void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
+ %rcp = fdiv double 1.0, %src
+ store double %rcp, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_rcp_pat_f64:
+; SI-UNSAFE: v_rsq_f64_e32
+; SI-SAFE-NOT: v_rsq_f64_e32
+; SI-SAFE: v_sqrt_f64
+; SI-SAFE: v_rcp_f64
+define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
+ %sqrt = call double @llvm.sqrt.f64(double %src) #0
+ %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) #0
+ store double %rcp, double addrspace(1)* %out, align 8
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.read.workdim.ll
index 2e299e30b8c..76a5757e4c2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.read.workdim.ll
@@ -1,23 +1,19 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}read_workdim:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV * [[VAL]], KC0[2].Z
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA %s
+; GCN-LABEL: {{^}}read_workdim:
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
define void @read_workdim(i32 addrspace(1)* %out) {
entry:
- %0 = call i32 @llvm.AMDGPU.read.workdim() #0
+ %0 = call i32 @llvm.amdgcn.read.workdim() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}read_workdim_known_bits:
+; GCN-LABEL: {{^}}read_workdim_known_bits:
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
; GCN-NOT: 0xff
@@ -25,13 +21,26 @@ entry:
; GCN: buffer_store_dword [[VVAL]]
define void @read_workdim_known_bits(i32 addrspace(1)* %out) {
entry:
- %dim = call i32 @llvm.AMDGPU.read.workdim() #0
+ %dim = call i32 @llvm.amdgcn.read.workdim() #0
%shl = shl i32 %dim, 24
%shr = lshr i32 %shl, 24
store i32 %shr, i32 addrspace(1)* %out
ret void
}
+; GCN-LABEL: {{^}}legacy_read_workdim:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+define void @legacy_read_workdim(i32 addrspace(1)* %out) {
+entry:
+ %dim = call i32 @llvm.AMDGPU.read.workdim() #0
+ store i32 %dim, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.amdgcn.read.workdim() #0
declare i32 @llvm.AMDGPU.read.workdim() #0
-attributes #0 = { readnone }
+attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll
index 67f1d22c717..be7398cbb85 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
-declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
+declare double @llvm.amdgcn.rsq.clamped.f64(double) nounwind readnone
; FUNC-LABEL: {{^}}rsq_clamped_f64:
; SI: v_rsq_clamp_f64_e32
@@ -17,7 +17,7 @@ declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
- %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
+ %rsq_clamped = call double @llvm.amdgcn.rsq.clamped.f64(double %src) nounwind readnone
store double %rsq_clamped, double addrspace(1)* %out, align 8
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll
new file mode 100644
index 00000000000..4c5489a4632
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll
@@ -0,0 +1,60 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.amdgcn.rsq.f32(float) #0
+declare double @llvm.amdgcn.rsq.f64(double) #0
+
+; FUNC-LABEL: {{^}}rsq_f32:
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+define void @rsq_f32(float addrspace(1)* %out, float %src) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.f32(float %src) #0
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; TODO: Really these should be constant folded
+; FUNC-LABEL: {{^}}rsq_f32_constant_4.0
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0
+define void @rsq_f32_constant_4.0(float addrspace(1)* %out) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.f32(float 4.0) #0
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_f32_constant_100.0
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000
+define void @rsq_f32_constant_100.0(float addrspace(1)* %out) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.f32(float 100.0) #0
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_f64:
+; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @rsq_f64(double addrspace(1)* %out, double %src) #1 {
+ %rsq = call double @llvm.amdgcn.rsq.f64(double %src) #0
+ store double %rsq, double addrspace(1)* %out, align 4
+ ret void
+}
+
+; TODO: Really these should be constant folded
+; FUNC-LABEL: {{^}}rsq_f64_constant_4.0
+; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, 4.0
+define void @rsq_f64_constant_4.0(double addrspace(1)* %out) #1 {
+ %rsq = call double @llvm.amdgcn.rsq.f64(double 4.0) #0
+ store double %rsq, double addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_f64_constant_100.0
+; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0x40590000
+; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0{{$}}
+; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @rsq_f64_constant_100.0(double addrspace(1)* %out) #1 {
+ %rsq = call double @llvm.amdgcn.rsq.f64(double 100.0) #0
+ store double %rsq, double addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll
index 6b546a7e17c..7757e411553 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
+declare double @llvm.amdgcn.trig.preop.f64(double, i32) nounwind readnone
; SI-LABEL: {{^}}test_trig_preop_f64:
; SI-DAG: buffer_load_dword [[SEG:v[0-9]+]]
@@ -12,7 +12,7 @@ declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load double, double addrspace(1)* %aptr, align 8
%b = load i32, i32 addrspace(1)* %bptr, align 4
- %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
+ %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
}
@@ -24,7 +24,7 @@ define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)*
; SI: s_endpgm
define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
%a = load double, double addrspace(1)* %aptr, align 8
- %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
+ %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll
new file mode 100644
index 00000000000..2f5947395c4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s
+
+; EG-LABEL: {{^}}read_workdim:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].Z
+define void @read_workdim(i32 addrspace(1)* %out) {
+entry:
+ %dim = call i32 @llvm.r600.read.workdim() #0
+ store i32 %dim, i32 addrspace(1)* %out
+ ret void
+}
+
+; EG-LABEL: {{^}}read_workdim_known_bits:
+define void @read_workdim_known_bits(i32 addrspace(1)* %out) {
+entry:
+ %dim = call i32 @llvm.r600.read.workdim() #0
+ %shl = shl i32 %dim, 24
+ %shr = lshr i32 %shl, 24
+ store i32 %shr, i32 addrspace(1)* %out
+ ret void
+}
+
+; EG-LABEL: {{^}}legacy_read_workdim:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].Z
+define void @legacy_read_workdim(i32 addrspace(1)* %out) {
+entry:
+ %dim = call i32 @llvm.AMDGPU.read.workdim() #0
+ store i32 %dim, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.workdim() #0
+declare i32 @llvm.AMDGPU.read.workdim() #0
+
+attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/pv.ll b/llvm/test/CodeGen/AMDGPU/pv.ll
index 9a57dd19765..fda812af5e2 100644
--- a/llvm/test/CodeGen/AMDGPU/pv.ll
+++ b/llvm/test/CodeGen/AMDGPU/pv.ll
@@ -103,7 +103,7 @@ main_body:
%95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
%96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95)
%97 = call float @fabs(float %96)
- %98 = call float @llvm.AMDGPU.rsq.f32(float %97)
+ %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97)
%99 = fmul float %4, %98
%100 = fmul float %5, %98
%101 = fmul float %6, %98
@@ -119,10 +119,10 @@ main_body:
%111 = extractelement <4 x float> %110, i32 2
%112 = fmul float %111, %10
%113 = fadd float %112, %22
- %114 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00)
- %115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
- %116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00)
- %117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+ %114 = call float @llvm.AMDGPU.clamp.f32(float %105, float 0.000000e+00, float 1.000000e+00)
+ %115 = call float @llvm.AMDGPU.clamp.f32(float %109, float 0.000000e+00, float 1.000000e+00)
+ %116 = call float @llvm.AMDGPU.clamp.f32(float %113, float 0.000000e+00, float 1.000000e+00)
+ %117 = call float @llvm.AMDGPU.clamp.f32(float %15, float 0.000000e+00, float 1.000000e+00)
%118 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%119 = extractelement <4 x float> %118, i32 0
%120 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
@@ -202,9 +202,9 @@ main_body:
%194 = fadd float %193, %188
%195 = fmul float %181, %174
%196 = fadd float %195, %190
- %197 = call float @llvm.AMDIL.clamp.(float %192, float 0.000000e+00, float 1.000000e+00)
- %198 = call float @llvm.AMDIL.clamp.(float %194, float 0.000000e+00, float 1.000000e+00)
- %199 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00)
+ %197 = call float @llvm.AMDGPU.clamp.f32(float %192, float 0.000000e+00, float 1.000000e+00)
+ %198 = call float @llvm.AMDGPU.clamp.f32(float %194, float 0.000000e+00, float 1.000000e+00)
+ %199 = call float @llvm.AMDGPU.clamp.f32(float %196, float 0.000000e+00, float 1.000000e+00)
%200 = insertelement <4 x float> undef, float %75, i32 0
%201 = insertelement <4 x float> %200, float %79, i32 1
%202 = insertelement <4 x float> %201, float %83, i32 2
@@ -225,10 +225,10 @@ declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
declare float @fabs(float) #2
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.f32(float) #1
+declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
-declare float @llvm.AMDIL.clamp.(float, float, float) #1
+declare float @llvm.AMDGPU.clamp.f32(float, float, float) #1
; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3
diff --git a/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
new file mode 100644
index 00000000000..b1d42206254
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FIXME: Evergreen only ever does unsafe fp math.
+; FUNC-LABEL: {{^}}rcp_pat_f32:
+; EG: RECIP_IEEE
+define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+ %rcp = fdiv float 1.0, %src
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
index b849c4038bc..3e70a84d67e 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -71,7 +71,7 @@ main_body:
%55 = fadd float %54, %53
%56 = fmul float %45, %45
%57 = fadd float %55, %56
- %58 = call float @llvm.AMDGPU.rsq.f32(float %57)
+ %58 = call float @llvm.amdgcn.rsq.f32(float %57)
%59 = fmul float %43, %58
%60 = fmul float %44, %58
%61 = fmul float %45, %58
@@ -213,7 +213,7 @@ declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.f32(float) #3
+declare float @llvm.amdgcn.rsq.f32(float) #3
; Function Attrs: readnone
declare float @llvm.AMDIL.exp.(float) #3
diff --git a/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll
index d7b35fc631e..6c94bbc760c 100644
--- a/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll
@@ -215,7 +215,7 @@ main_body:
%198 = fadd float %197, %196
%199 = fmul float %97, %97
%200 = fadd float %198, %199
- %201 = call float @llvm.AMDGPU.rsq.f32(float %200)
+ %201 = call float @llvm.amdgcn.rsq.f32(float %200)
%202 = fmul float %95, %201
%203 = fmul float %96, %201
%204 = fmul float %202, %29
@@ -396,7 +396,7 @@ IF67: ; preds = %LOOP65
%355 = fadd float %354, %353
%356 = fmul float %352, %352
%357 = fadd float %355, %356
- %358 = call float @llvm.AMDGPU.rsq.f32(float %357)
+ %358 = call float @llvm.amdgcn.rsq.f32(float %357)
%359 = fmul float %350, %358
%360 = fmul float %351, %358
%361 = fmul float %352, %358
@@ -524,7 +524,7 @@ IF67: ; preds = %LOOP65
%483 = fadd float %482, %481
%484 = fmul float %109, %109
%485 = fadd float %483, %484
- %486 = call float @llvm.AMDGPU.rsq.f32(float %485)
+ %486 = call float @llvm.amdgcn.rsq.f32(float %485)
%487 = fmul float %107, %486
%488 = fmul float %108, %486
%489 = fmul float %109, %486
@@ -553,7 +553,7 @@ IF67: ; preds = %LOOP65
%512 = fadd float %511, %510
%513 = fmul float %97, %97
%514 = fadd float %512, %513
- %515 = call float @llvm.AMDGPU.rsq.f32(float %514)
+ %515 = call float @llvm.amdgcn.rsq.f32(float %514)
%516 = fmul float %95, %515
%517 = fmul float %96, %515
%518 = fmul float %97, %515
@@ -670,7 +670,7 @@ declare i32 @llvm.SI.tid() #2
declare float @ceil(float) #3
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.f32(float) #2
+declare float @llvm.amdgcn.rsq.f32(float) #2
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
@@ -899,7 +899,7 @@ main_body:
%212 = fadd float %211, %210
%213 = fmul float %209, %209
%214 = fadd float %212, %213
- %215 = call float @llvm.AMDGPU.rsq.f32(float %214)
+ %215 = call float @llvm.amdgcn.rsq.f32(float %214)
%216 = fmul float %205, %215
%217 = fmul float %207, %215
%218 = fmul float %209, %215
@@ -1135,7 +1135,7 @@ IF189: ; preds = %LOOP
%434 = fsub float -0.000000e+00, %433
%435 = fadd float 0x3FF00068E0000000, %434
%436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00)
- %437 = call float @llvm.AMDGPU.rsq.f32(float %436)
+ %437 = call float @llvm.amdgcn.rsq.f32(float %436)
%438 = fmul float %437, %436
%439 = fsub float -0.000000e+00, %436
%440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00)
@@ -1159,7 +1159,7 @@ IF189: ; preds = %LOOP
%458 = fadd float %457, %456
%459 = fmul float %455, %455
%460 = fadd float %458, %459
- %461 = call float @llvm.AMDGPU.rsq.f32(float %460)
+ %461 = call float @llvm.amdgcn.rsq.f32(float %460)
%462 = fmul float %451, %461
%463 = fmul float %453, %461
%464 = fmul float %455, %461
@@ -1269,7 +1269,7 @@ ENDIF197: ; preds = %IF189, %IF198
%559 = fadd float %558, %557
%560 = fmul float %556, %556
%561 = fadd float %559, %560
- %562 = call float @llvm.AMDGPU.rsq.f32(float %561)
+ %562 = call float @llvm.amdgcn.rsq.f32(float %561)
%563 = fmul float %562, %561
%564 = fsub float -0.000000e+00, %561
%565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00)
diff --git a/llvm/test/Transforms/InstCombine/r600-intrinsics.ll b/llvm/test/Transforms/InstCombine/amdgcn-intrinsics.ll
index 1db6b0d28bf..387ad31f253 100644
--- a/llvm/test/Transforms/InstCombine/r600-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/amdgcn-intrinsics.ll
@@ -1,47 +1,47 @@
; RUN: opt -instcombine -S < %s | FileCheck %s
-declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
-declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
+declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone
; CHECK-LABEL: @test_constant_fold_rcp_f32_1
; CHECK-NEXT: ret float 1.000000e+00
define float @test_constant_fold_rcp_f32_1() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone
+ %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone
ret float %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f64_1
; CHECK-NEXT: ret double 1.000000e+00
define double @test_constant_fold_rcp_f64_1() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone
+ %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone
ret double %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f32_half
; CHECK-NEXT: ret float 2.000000e+00
define float @test_constant_fold_rcp_f32_half() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone
+ %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone
ret float %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f64_half
; CHECK-NEXT: ret double 2.000000e+00
define double @test_constant_fold_rcp_f64_half() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone
+ %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone
ret double %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f32_43
-; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01)
+; CHECK-NEXT: call float @llvm.amdgcn.rcp.f32(float 4.300000e+01)
define float @test_constant_fold_rcp_f32_43() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone
+ %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone
ret float %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f64_43
-; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01)
+; CHECK-NEXT: call double @llvm.amdgcn.rcp.f64(double 4.300000e+01)
define double @test_constant_fold_rcp_f64_43() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone
+ %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone
ret double %val
}
OpenPOWER on IntegriCloud