diff options
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 52 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/fceil.ll | 84 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/ffloor.ll | 84 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/ftrunc.ll | 84 | 
7 files changed, 324 insertions, 2 deletions
| diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index babd0e2eb1e..9629b70b470 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -45,6 +45,8 @@ static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,  AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :    TargetLowering(TM, new TargetLoweringObjectFileELF()) { +  Subtarget = &TM.getSubtarget<AMDGPUSubtarget>(); +    // Initialize target lowering borrowed from AMDIL    InitAMDILLowering(); diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.h b/llvm/lib/Target/R600/AMDGPUISelLowering.h index b53ba0a542f..7fa25905d01 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.h +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.h @@ -21,9 +21,13 @@  namespace llvm {  class AMDGPUMachineFunction; +class AMDGPUSubtarget;  class MachineRegisterInfo;  class AMDGPUTargetLowering : public TargetLowering { +protected: +  const AMDGPUSubtarget *Subtarget; +  private:    void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,                               SmallVectorImpl<SDValue> &Args, diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 1a49ccb2268..004957ce189 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -14,6 +14,7 @@  #include "SIISelLowering.h"  #include "AMDGPU.h" +#include "AMDGPUSubtarget.h"  #include "AMDILIntrinsicInfo.h"  #include "SIInstrInfo.h"  #include "SIMachineFunctionInfo.h" @@ -30,7 +31,6 @@ using namespace llvm;  SITargetLowering::SITargetLowering(TargetMachine &TM) :      AMDGPUTargetLowering(TM) { -    addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);    addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass); @@ -175,8 +175,20 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :      }    } -  setTargetDAGCombine(ISD::SELECT_CC); +  for (int I = MVT::v1f64; I <= MVT::v8f64; ++I) { +    MVT::SimpleValueType VT = static_cast<MVT::SimpleValueType>(I); +    setOperationAction(ISD::FTRUNC, MVT::f64, Expand); +    setOperationAction(ISD::FCEIL, MVT::f64, Expand); +    setOperationAction(ISD::FFLOOR, MVT::f64, Expand); +  } +  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { +    setOperationAction(ISD::FTRUNC, MVT::f64, Legal); +    setOperationAction(ISD::FCEIL, MVT::f64, Legal); +    setOperationAction(ISD::FFLOOR, MVT::f64, Legal); +  } + +  setTargetDAGCombine(ISD::SELECT_CC);    setTargetDAGCombine(ISD::SETCC);    setSchedulingPreference(Sched::RegPressure); diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 9da05c34d7b..b45da5cb9ee 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -29,6 +29,9 @@ def SendMsgImm : Operand<i32> {  def isSI : Predicate<"Subtarget.getGeneration() "                        ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">; +def isCI : Predicate<"Subtarget.getGeneration() " +                      ">= AMDGPUSubtarget::SEA_ISLANDS">; +  def WAIT_FLAG : InstFlag<"printWaitFlag">;  let Predicates = [isSI] in { @@ -2104,6 +2107,55 @@ def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;  def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;  def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>; +let Predicates = [isCI] in { + +// Sea island new arithmetic instructinos +let neverHasSideEffects = 1 in { +defm V_TRUNC_F64 : VOP1_64 <0x00000017, "V_TRUNC_F64", +  [(set f64:$dst, (ftrunc f64:$src0))] +>; +defm V_CEIL_F64 : VOP1_64 <0x00000018, "V_CEIL_F64", +  [(set f64:$dst, (fceil f64:$src0))] +>; +defm V_FLOOR_F64 : VOP1_64 <0x0000001A, "V_FLOOR_F64", +  [(set f64:$dst, (ffloor f64:$src0))] +>; + +defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64", []>; + +def V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>; +def V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>; +def V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>; +def V_MAD_U64_U32 : VOP3_64 <0x00000176, "V_MAD_U64_U32", []>; + +// XXX - Does this set VCC? +def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>; +} // End neverHasSideEffects = 1 + +// Remaining instructions: +// FLAT_* +// S_CBRANCH_CDBGUSER +// S_CBRANCH_CDBGSYS +// S_CBRANCH_CDBGSYS_OR_USER +// S_CBRANCH_CDBGSYS_AND_USER +// S_DCACHE_INV_VOL +// V_EXP_LEGACY_F32 +// V_LOG_LEGACY_F32 +// DS_NOP +// DS_GWS_SEMA_RELEASE_ALL +// DS_WRAP_RTN_B32 +// DS_CNDXCHG32_RTN_B64 +// DS_WRITE_B96 +// DS_WRITE_B128 +// DS_CONDXCHG32_RTN_B128 +// DS_READ_B96 +// DS_READ_B128 +// BUFFER_LOAD_DWORDX3 +// BUFFER_STORE_DWORDX3 + +} // End Predicates = [isCI] + +  /********** ====================== **********/  /**********   Indirect adressing   **********/  /********** ====================== **********/ diff --git a/llvm/test/CodeGen/R600/fceil.ll b/llvm/test/CodeGen/R600/fceil.ll new file mode 100644 index 00000000000..b8b945f46ff --- /dev/null +++ b/llvm/test/CodeGen/R600/fceil.ll @@ -0,0 +1,84 @@ +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s + +declare double @llvm.ceil.f64(double) nounwind readnone +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone +declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone +declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone +declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone + +; CI-LABEL: @fceil_f64: +; CI: V_CEIL_F64_e32 +define void @fceil_f64(double addrspace(1)* %out, double %x) { +  %y = call double @llvm.ceil.f64(double %x) nounwind readnone +  store double %y, double addrspace(1)* %out +  ret void +} + +; CI-LABEL: @fceil_v2f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { +  %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone +  store <2 x double> %y, <2 x double> addrspace(1)* %out +  ret void +} + +; FIXME-CI-LABEL: @fceil_v3f64: +; FIXME-CI: V_CEIL_F64_e32 +; FIXME-CI: V_CEIL_F64_e32 +; FIXME-CI: V_CEIL_F64_e32 +; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { +;   %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone +;   store <3 x double> %y, <3 x double> addrspace(1)* %out +;   ret void +; } + +; CI-LABEL: @fceil_v4f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { +  %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone +  store <4 x double> %y, <4 x double> addrspace(1)* %out +  ret void +} + +; CI-LABEL: @fceil_v8f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { +  %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone +  store <8 x double> %y, <8 x double> addrspace(1)* %out +  ret void +} + +; CI-LABEL: @fceil_v16f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { +  %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone +  store <16 x double> %y, <16 x double> addrspace(1)* %out +  ret void +} diff --git a/llvm/test/CodeGen/R600/ffloor.ll b/llvm/test/CodeGen/R600/ffloor.ll new file mode 100644 index 00000000000..51d2b896150 --- /dev/null +++ b/llvm/test/CodeGen/R600/ffloor.ll @@ -0,0 +1,84 @@ +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s + +declare double @llvm.floor.f64(double) nounwind readnone +declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone +declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone +declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone +declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone +declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone + +; CI-LABEL: @ffloor_f64: +; CI: V_FLOOR_F64_e32 +define void @ffloor_f64(double addrspace(1)* %out, double %x) { +  %y = call double @llvm.floor.f64(double %x) nounwind readnone +  store double %y, double addrspace(1)* %out +  ret void +} + +; CI-LABEL: @ffloor_v2f64: +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { +  %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone +  store <2 x double> %y, <2 x double> addrspace(1)* %out +  ret void +} + +; FIXME-CI-LABEL: @ffloor_v3f64: +; FIXME-CI: V_FLOOR_F64_e32 +; FIXME-CI: V_FLOOR_F64_e32 +; FIXME-CI: V_FLOOR_F64_e32 +; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { +;   %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone +;   store <3 x double> %y, <3 x double> addrspace(1)* %out +;   ret void +; } + +; CI-LABEL: @ffloor_v4f64: +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { +  %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone +  store <4 x double> %y, <4 x double> addrspace(1)* %out +  ret void +} + +; CI-LABEL: @ffloor_v8f64: +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { +  %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone +  store <8 x double> %y, <8 x double> addrspace(1)* %out +  ret void +} + +; CI-LABEL: @ffloor_v16f64: +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +; CI: V_FLOOR_F64_e32 +define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { +  %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone +  store <16 x double> %y, <16 x double> addrspace(1)* %out +  ret void +} diff --git a/llvm/test/CodeGen/R600/ftrunc.ll b/llvm/test/CodeGen/R600/ftrunc.ll new file mode 100644 index 00000000000..6b235ffbd98 --- /dev/null +++ b/llvm/test/CodeGen/R600/ftrunc.ll @@ -0,0 +1,84 @@ +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s + +declare double @llvm.trunc.f64(double) nounwind readnone +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone +declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone +declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone +declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone + +; CI-LABEL: @ftrunc_f64: +; CI: V_TRUNC_F64_e32 +define void @ftrunc_f64(double addrspace(1)* %out, double %x) { +  %y = call double @llvm.trunc.f64(double %x) nounwind readnone +  store double %y, double addrspace(1)* %out +  ret void +} + +; CI-LABEL: @ftrunc_v2f64: +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { +  %y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone +  store <2 x double> %y, <2 x double> addrspace(1)* %out +  ret void +} + +; FIXME-CI-LABEL: @ftrunc_v3f64: +; FIXME-CI: V_TRUNC_F64_e32 +; FIXME-CI: V_TRUNC_F64_e32 +; FIXME-CI: V_TRUNC_F64_e32 +; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { +;   %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone +;   store <3 x double> %y, <3 x double> addrspace(1)* %out +;   ret void +; } + +; CI-LABEL: @ftrunc_v4f64: +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { +  %y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone +  store <4 x double> %y, <4 x double> addrspace(1)* %out +  ret void +} + +; CI-LABEL: @ftrunc_v8f64: +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { +  %y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone +  store <8 x double> %y, <8 x double> addrspace(1)* %out +  ret void +} + +; CI-LABEL: @ftrunc_v16f64: +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +; CI: V_TRUNC_F64_e32 +define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { +  %y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone +  store <16 x double> %y, <16 x double> addrspace(1)* %out +  ret void +} | 

