5 files changed, 83 insertions, 50 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 75afce0534b..0d325a32347 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -399,6 +399,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
   setTargetDAGCombine(ISD::FADD);
   setTargetDAGCombine(ISD::FSUB);
 
+  setTargetDAGCombine(ISD::BITCAST);
+
   setBooleanContents(ZeroOrNegativeOneBooleanContent);
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
@@ -2547,6 +2549,38 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
   switch(N->getOpcode()) {
   default:
     break;
+  case ISD::BITCAST: {
+    EVT DestVT = N->getValueType(0);
+    if (DestVT.getSizeInBits() != 64 && !DestVT.isVector())
+      break;
+
+    // Fold bitcasts of constants.
+    //
+    // v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k)
+    // TODO: Generalize and move to DAGCombiner
+    SDValue Src = N->getOperand(0);
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) {
+      assert(Src.getValueType() == MVT::i64);
+      SDLoc SL(N);
+      uint64_t CVal = C->getZExtValue();
+      return DAG.getNode(ISD::BUILD_VECTOR, SL, DestVT,
+                         DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
+                         DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
+    }
+
+    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) {
+      const APInt &Val = C->getValueAPF().bitcastToAPInt();
+      SDLoc SL(N);
+      uint64_t CVal = Val.getZExtValue();
+      SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+                                DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
+                                DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
+
+      return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec);
+    }
+
+    break;
+  }
   case ISD::SHL: {
     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
       break;
diff --git a/llvm/test/CodeGen/AMDGPU/fceil64.ll b/llvm/test/CodeGen/AMDGPU/fceil64.ll
index eb90b75b208..579cbf435e7 100644
--- a/llvm/test/CodeGen/AMDGPU/fceil64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fceil64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 
@@ -25,7 +25,7 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
 ; SI-DAG: cndmask_b32
 ; SI-DAG: v_cmp_lt_f64
 ; SI-DAG: v_cmp_lg_f64
-; SI: s_and_b64
+; SI-DAG: s_and_b64
 ; SI: v_cndmask_b32
 ; SI: v_cndmask_b32
 ; SI: v_add_f64
diff --git a/llvm/test/CodeGen/AMDGPU/sdivrem64.ll b/llvm/test/CodeGen/AMDGPU/sdivrem64.ll
index a9b2b7f9df5..a7ce948acd4 100644
--- a/llvm/test/CodeGen/AMDGPU/sdivrem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdivrem64.ll
@@ -1,8 +1,8 @@
-;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
+;RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
 ;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
 ;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
 
-;FUNC-LABEL: {{^}}test_sdiv:
+;FUNC-LABEL: {{^}}s_test_sdiv:
 ;EG: RECIP_UINT
 ;EG: LSHL {{.*}}, 1,
 ;EG: BFE_UINT
@@ -36,47 +36,47 @@
 ;EG: BFE_UINT
 ;EG: BFE_UINT
 
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN: v_bfe_u32
-;GCN-NOT: v_mad_f32
-;SI-NOT: v_lshr_b64
-;VI-NOT: v_lshrrev_b64
-;GCN: s_endpgm
-define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN: s_bfe_u32
+; GCN-NOT: v_mad_f32
+; SI-NOT: v_lshr_b64
+; VI-NOT: v_lshrrev_b64
+; GCN: s_endpgm
+define void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
   %result = sdiv i64 %x, %y
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-;FUNC-LABEL: {{^}}test_srem:
+;FUNC-LABEL: {{^}}s_test_srem:
 ;EG: RECIP_UINT
 ;EG: BFE_UINT
 ;EG: BFE_UINT
@@ -144,7 +144,7 @@ define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;SI-NOT: v_lshr_b64
 ;VI-NOT: v_lshrrev_b64
 ;GCN: s_endpgm
-define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
   %result = urem i64 %x, %y
   store i64 %result, i64 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index a7e777cd046..b9c34c40c39 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
@@ -10,14 +10,14 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
   ret void
 }
 
-; FIXME: select on 0, 0
 ; SI-LABEL: {{^}}sint_to_fp_i1_f64:
 ; SI: v_cmp_eq_i32_e64 vcc,
 ; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
 ; uses an SGPR (implicit vcc).
-; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, vcc
-; SI: buffer_store_dwordx2
+; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}
+; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
+; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
+
 ; SI: s_endpgm
 define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
   %cmp = icmp eq i32 %in, 0
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index c35f5099fbe..2723c0dc95e 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -70,14 +70,13 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i
   ret void
 }
 
-; FIXME: select on 0, 0
 ; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
 ; SI: v_cmp_eq_i32_e64 vcc
 ; We can't fold the SGPRs into v_cndmask_b32_e32, because it already
 ; uses an SGPR (implicit vcc).
-; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, vcc
-; SI: buffer_store_dwordx2
+; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}
+; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
+; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
 ; SI: s_endpgm
 define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
   %cmp = icmp eq i32 %in, 0