diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-09 17:49:14 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-09 17:49:14 +0000 |
| commit | 38d8ed2b75122ed9b222012b0aec8ceb378599d8 (patch) | |
| tree | 0b2cf374c00a93440241690de4e48000fa4bc862 /llvm/test | |
| parent | 52facf0195a16d82985b6025058be61eb1d85db9 (diff) | |
| download | bcm5719-llvm-38d8ed2b75122ed9b222012b0aec8ceb378599d8.tar.gz bcm5719-llvm-38d8ed2b75122ed9b222012b0aec8ceb378599d8.zip | |
AMDGPU: Fix i128 mul
llvm-svn: 289231
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/mul.ll | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll index 5ceef7fda82..c2fae4cfd2b 100644 --- a/llvm/test/CodeGen/AMDGPU/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/mul.ll @@ -198,3 +198,74 @@ endif: store i64 %3, i64 addrspace(1)* %out ret void } + +; FIXME: Load dwordx4 +; FUNC-LABEL: {{^}}s_mul_i128: +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 + +; SI: v_mul_hi_u32 +; SI: v_mul_hi_u32 +; SI: s_mul_i32 +; SI: v_mul_hi_u32 +; SI: s_mul_i32 +; SI: s_mul_i32 +; SI: v_mul_hi_u32 +; SI: v_mul_hi_u32 +; SI: s_mul_i32 +; SI-DAG: s_mul_i32 +; SI-DAG: v_mul_hi_u32 +; SI: s_mul_i32 +; SI: s_mul_i32 +; SI: s_mul_i32 +; SI: s_mul_i32 +; SI: s_mul_i32 + +; SI: buffer_store_dwordx4 +define void @s_mul_i128(i128 addrspace(1)* %out, i128 %a, i128 %b) nounwind #0 { + %mul = mul i128 %a, %b + store i128 %mul, i128 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_mul_i128: +; SI: {{buffer|flat}}_load_dwordx4 +; SI: {{buffer|flat}}_load_dwordx4 + +; SI: v_mul_lo_i32 +; SI: v_mul_hi_u32 +; SI: v_mul_hi_u32 +; SI: v_mul_lo_i32 +; SI: v_mul_hi_u32 +; SI: v_mul_hi_u32 +; SI: v_mul_lo_i32 +; SI: v_mul_lo_i32 +; SI: v_add_i32_e32 +; SI: v_mul_hi_u32 +; SI: v_mul_lo_i32 +; SI: v_mul_hi_u32 +; SI: v_mul_lo_i32 +; SI: v_mul_lo_i32 +; SI: v_mul_lo_i32 +; SI: v_mul_lo_i32 +; SI: v_mul_lo_i32 + +; SI: {{buffer|flat}}_store_dwordx4 +define void @v_mul_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %aptr, i128 addrspace(1)* %bptr) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep.a = getelementptr inbounds i128, i128 addrspace(1)* %aptr, i32 %tid + %gep.b = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid + %gep.out = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid + %a = load i128, i128 addrspace(1)* %gep.a + %b = load i128, i128 addrspace(1)* %gep.b + %mul = mul i128 %a, %b + store i128 %mul, i128 addrspace(1)* %gep.out + ret void +} + +declare i32 @llvm.r600.read.tidig.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone} |

