summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-09-09 23:32:53 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-09-09 23:32:53 +0000
commit124384f08d4cb9b8c698951ed67fd6db79a15d15 (patch)
treebef6b9dcd6420bcf678b0e617646e70bb06947c8
parent8dc0e0943b9e9b864b91a50e991392db09f2dc49 (diff)
downloadbcm5719-llvm-124384f08d4cb9b8c698951ed67fd6db79a15d15.tar.gz
bcm5719-llvm-124384f08d4cb9b8c698951ed67fd6db79a15d15.zip
AMDGPU: Fix immediate folding logic when shrinking instructions
If the literal is being folded into src0, it doesn't matter if it's an SGPR because it's being replaced with the literal. Also fixes initially selecting 32-bit versions of some instructions which also confused commuting. llvm-svn: 281117
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp10
-rw-r--r--llvm/test/CodeGen/AMDGPU/ctlz.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/fneg-fabs.ll15
-rw-r--r--llvm/test/CodeGen/AMDGPU/half.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/or.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/s_movk_i32.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/si-literal-folding.ll9
10 files changed, 43 insertions, 39 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c84847f2e0e..5e0d34d8498 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1852,13 +1852,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
- case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
- case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
- case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
- case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
- case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
- case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
- case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
+ case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
+ case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
+ case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
+ case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
+ case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
+ case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
+ case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 94506f2fcd0..b8f3c10e618 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1871,7 +1871,7 @@ def : Pat <
def : Pat <
(fneg (fabs f32:$src)),
- (S_OR_B32 $src, 0x80000000) // Set sign bit
+ (S_OR_B32 $src, (S_MOV_B32 0x80000000)) // Set sign bit
>;
// FIXME: Should use S_OR_B32
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index c891af7a08f..e72b7d496ab 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -134,7 +134,6 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
MachineOperand &Src0 = MI.getOperand(Src0Idx);
@@ -144,12 +143,6 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
return;
- // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
- // SGPR, we cannot commute the instruction, so we can't fold any literal
- // constants.
- if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
- return;
-
// Try to fold Src0
if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
unsigned Reg = Src0.getReg();
@@ -158,7 +151,8 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
MachineOperand &MovSrc = Def->getOperand(1);
bool ConstantFolded = false;
- if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
+ if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
+ isUInt<32>(MovSrc.getImm()))) {
Src0.ChangeToImmediate(MovSrc.getImm());
ConstantFolded = true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index 2d5e9f4001d..e9d26a225e3 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -143,7 +143,7 @@ define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind
; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]]
-; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]]
+; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]]
; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]
; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc
; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
index 898acc36099..1362fa7a908 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -82,8 +82,10 @@ define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in)
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: -PV
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; FIXME: In this case two uses of the constant should be folded
+; SI: s_mov_b32 [[SIGNBITK:s[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
%fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
@@ -92,10 +94,11 @@ define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
}
; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; SI: s_mov_b32 [[SIGNBITK:s[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
%fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll
index d21d66176a1..aa1f5b7362d 100644
--- a/llvm/test/CodeGen/AMDGPU/half.ll
+++ b/llvm/test/CodeGen/AMDGPU/half.ll
@@ -16,7 +16,7 @@ define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
; GCN: v_lshlrev_b32_e32 [[HI:v[0-9]+]], 16, [[V1]]
-; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[V0]], [[HI]]
+; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[HI]], [[V0]]
; GCN: buffer_store_dword [[PACKED]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN: s_endpgm
define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
@@ -440,7 +440,7 @@ define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspa
; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[CVT1]]
-; GCN-DAG: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT0]], [[SHL]]
+; GCN-DAG: v_or_b32_e32 [[PACKED:v[0-9]+]], [[SHL]], [[CVT0]]
; GCN-DAG: buffer_store_dword [[PACKED]]
; GCN: s_endpgm
define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
index 4c559f42d42..7b1373b13f3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
@@ -10,9 +10,7 @@ declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
; VI: s_load_dword [[SRC:s[0-9]+]]
; VI-DAG: v_rsq_f32_e32 [[RSQ:v[0-9]+]], [[SRC]]
; VI-DAG: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
-; TODO: this constant should be folded:
-; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff7fffff
-; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[K]]
+; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xff7fffff, [[MIN]]
; VI: buffer_store_dword [[RESULT]]
define void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 {
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
diff --git a/llvm/test/CodeGen/AMDGPU/or.ll b/llvm/test/CodeGen/AMDGPU/or.ll
index 9b90ff798ca..56f54cf7c5e 100644
--- a/llvm/test/CodeGen/AMDGPU/or.ll
+++ b/llvm/test/CodeGen/AMDGPU/or.ll
@@ -113,11 +113,9 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
}
; FUNC-LABEL: {{^}}vector_or_i64_loadimm:
-; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
-; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xdf77987f, v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x146f, v[[HI_VREG]]
; SI: s_endpgm
define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 8
diff --git a/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll
index 47c7fbb6dd6..e422270fc4a 100644
--- a/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}s_movk_i32_k0:
@@ -11,6 +11,7 @@ define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 4295032831)
ret void
}
@@ -24,6 +25,7 @@ define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 4295000063)
ret void
}
@@ -37,6 +39,7 @@ define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 274877939711)
ret void
}
@@ -50,6 +53,7 @@ define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 4295000064)
ret void
}
@@ -63,6 +67,7 @@ define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 4295098368)
ret void
}
@@ -77,6 +82,7 @@ define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 18374967954648334319)
ret void
}
@@ -90,6 +96,7 @@ define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 270582939713)
ret void
}
@@ -104,10 +111,10 @@ define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 70368744185856)
ret void
}
-
; SI-LABEL: {{^}}s_movk_i32_k8:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
@@ -119,6 +126,7 @@ define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 1229782942255906816)
ret void
}
@@ -133,6 +141,7 @@ define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 1229782942255906817)
ret void
}
@@ -147,6 +156,7 @@ define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 1229782942255909000)
ret void
}
@@ -161,6 +171,7 @@ define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 1229782942255910911)
ret void
}
@@ -175,5 +186,6 @@ define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
store i64 %or, i64 addrspace(1)* %out
+ call void asm sideeffect "; use $0", "s"(i64 1229782942255902721)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll b/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll
index d5030adc89b..b3f000c8ccd 100644
--- a/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-literal-folding.ll
@@ -1,9 +1,8 @@
-; XFAIL: *
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
-
-; CHECK-LABEL: {{^}}main:
-; CHECK-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0xbf4353f8
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; GCN-LABEL: {{^}}main:
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
define amdgpu_vs void @main(float) {
main_body:
%1 = fmul float %0, 0x3FE86A7F00000000
OpenPOWER on IntegriCloud