[DAGCombiner][AMDGPU][Mips] Fold bitcast with volatile loads if the resulting load is legal for the target.

Summary: I'm not sure if this patch is correct or if it needs more qualifying somehow. Bitcast shouldn't change the size of the load so it should be ok? We already do something similar for stores. We'll change the type of a volatile store if the resulting store is Legal or Custom. I'm not sure we should be allowing Custom there... I was playing around with converting X86 atomic loads/stores(except seq_cst) into regular volatile loads and stores during lowering. This would allow some special RMW isel patterns in X86InstrCompiler.td to be removed. But there's some floating point patterns in there that didn't work because we don't fold (f64 (bitconvert (i64 volatile load))) or (f32 (bitconvert (i32 volatile load))). Reviewers: efriedma, atanasyan, arsenm Reviewed By: efriedma Subscribers: jvesely, arsenm, sdardis, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, arichardson, jrtc27, atanasyan, jfb, llvm-commits Differential Revision: https://reviews.llvm.org/D50491 llvm-svn: 340797
author: Craig Topper <craig.topper@intel.com> 2018-08-28 03:47:20 +0000
committer: Craig Topper <craig.topper@intel.com> 2018-08-28 03:47:20 +0000
commit: c7506b28c11b4a382bd499c21ef5e6aac8c3c29f (patch)
tree: e852db7f307fc70d417f2860cf978d5e18a0b7f8 /llvm/test/CodeGen
parent: a6cd4b9bced862533d5ce7ca4dde18d82b623801 (diff)
download: bcm5719-llvm-c7506b28c11b4a382bd499c21ef5e6aac8c3c29f.tar.gz
bcm5719-llvm-c7506b28c11b4a382bd499c21ef5e6aac8c3c29f.zip
4 files changed, 19 insertions, 35 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
index 04aef6dad0c..91070878b0f 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
@@ -147,10 +147,7 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3
 }
 
 ; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
+; GCN: {{buffer|flat}}_load_dword
 ; GCN: buffer_store_dword
 ; GCN: s_endpgm
 define amdgpu_kernel void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
diff --git a/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll b/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll
index e527866eb97..ca0794b22dc 100644
--- a/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll
+++ b/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll
@@ -18,14 +18,10 @@ entry:
 }
 
 ; ALL-LABEL: retldouble:
-; N32-DAG:           ld [[R2:\$[0-9]+]], %lo(fp128)([[R1:\$[0-9]+]])
+; N32-DAG:           ldc1 $f0, %lo(fp128)([[R1:\$[0-9]+]])
 ; N32-DAG:           addiu [[R3:\$[0-9]+]], [[R1]], %lo(fp128)
-; N32-DAG:           ld [[R4:\$[0-9]+]], 8([[R3]])
-; N32-DAG:           dmtc1 [[R2]], $f0
-; N32-DAG:           dmtc1 [[R4]], $f2
+; N32-DAG:           ldc1 $f2, 8([[R3]])
 
 ; N64-DAG:           lui [[R2:\$[0-9]+]], %highest(fp128)
-; N64-DAG:           ld [[R3:\$[0-9]+]], %lo(fp128)([[R2]])
-; N64-DAG:           ld [[R4:\$[0-9]+]], 8([[R2]])
-; N64-DAG:           dmtc1 [[R3]], $f0
-; N64-DAG:           dmtc1 [[R4]], $f2
+; N64-DAG:           ldc1 $f0, %lo(fp128)([[R2]])
+; N64-DAG:           ldc1 $f2, 8([[R2]])
diff --git a/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll b/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
index 492db768950..43b0baa7001 100644
--- a/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
+++ b/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
@@ -23,14 +23,10 @@ entry:
 ; is returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
 ; match the de facto ABI as implemented by GCC.
 ; N32-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_fp128)
-; N32-DAG:        ld  [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
-; N32-DAG:        dmtc1 [[R2]], $f0
+; N32-DAG:        ldc1 $f0, %lo(struct_fp128)([[R1]])
 ; N32-DAG:        addiu [[R3:\$[0-9]+]], [[R1]], %lo(struct_fp128)
-; N32-DAG:        ld  [[R4:\$[0-9]+]], 8([[R3]])
-; N32-DAG:        dmtc1 [[R4]], $f1
+; N32-DAG:        ldc1  $f1, 8([[R3]])
 
 ; N64-DAG:        lui  [[R1:\$[0-9]+]], %highest(struct_fp128)
-; N64-DAG:        ld  [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
-; N64-DAG:        dmtc1 [[R2]], $f0
-; N64-DAG:        ld  [[R4:\$[0-9]+]], 8([[R1]])
-; N64-DAG:        dmtc1 [[R4]], $f1
+; N64-DAG:        ldc1 $f0, %lo(struct_fp128)([[R1]])
+; N64-DAG:        ldc1 $f1, 8([[R1]])
diff --git a/llvm/test/CodeGen/Mips/msa/bitcast.ll b/llvm/test/CodeGen/Mips/msa/bitcast.ll
index 837cc28aa82..f81a9c9c45f 100644
--- a/llvm/test/CodeGen/Mips/msa/bitcast.ll
+++ b/llvm/test/CodeGen/Mips/msa/bitcast.ll
@@ -362,14 +362,13 @@ entry:
 }
 
 ; LITENDIAN: v8f16_to_v16i8:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.b [[R3]],
 ; LITENDIAN: .size v8f16_to_v16i8
 
 ; BIGENDIAN: v8f16_to_v16i8:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.b [[R4]],
 ; BIGENDIAN: .size v8f16_to_v16i8
@@ -431,14 +430,13 @@ entry:
 }
 
 ; LITENDIAN: v8f16_to_v4i32:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.w [[R2]],
 ; LITENDIAN: .size v8f16_to_v4i32
 
 ; BIGENDIAN: v8f16_to_v4i32:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.w [[R3]],
 ; BIGENDIAN: .size v8f16_to_v4i32
@@ -455,14 +453,13 @@ entry:
 }
 
 ; LITENDIAN: v8f16_to_v4f32:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.w [[R2]],
 ; LITENDIAN: .size v8f16_to_v4f32
 
 ; BIGENDIAN: v8f16_to_v4f32:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.w [[R3]],
 ; BIGENDIAN: .size v8f16_to_v4f32
@@ -479,14 +476,13 @@ entry:
 }
 
 ; LITENDIAN: v8f16_to_v2i64:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.d [[R2]],
 ; LITENDIAN: .size v8f16_to_v2i64
 
 ; BIGENDIAN: v8f16_to_v2i64:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.d [[R3]],
 ; BIGENDIAN: .size v8f16_to_v2i64
@@ -503,14 +499,13 @@ entry:
 }
 
 ; LITENDIAN: v8f16_to_v2f64:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.d [[R2]],
 ; LITENDIAN: .size v8f16_to_v2f64
 
 ; BIGENDIAN: v8f16_to_v2f64:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.d [[R3]],
 ; BIGENDIAN: .size v8f16_to_v2f64
author	Craig Topper <craig.topper@intel.com>	2018-08-28 03:47:20 +0000
committer	Craig Topper <craig.topper@intel.com>	2018-08-28 03:47:20 +0000
commit	c7506b28c11b4a382bd499c21ef5e6aac8c3c29f (patch)
tree	e852db7f307fc70d417f2860cf978d5e18a0b7f8 /llvm/test/CodeGen
parent	a6cd4b9bced862533d5ce7ca4dde18d82b623801 (diff)
download	bcm5719-llvm-c7506b28c11b4a382bd499c21ef5e6aac8c3c29f.tar.gz bcm5719-llvm-c7506b28c11b4a382bd499c21ef5e6aac8c3c29f.zip