summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-08-28 03:47:20 +0000
committerCraig Topper <craig.topper@intel.com>2018-08-28 03:47:20 +0000
commitc7506b28c11b4a382bd499c21ef5e6aac8c3c29f (patch)
treee852db7f307fc70d417f2860cf978d5e18a0b7f8 /llvm/test/CodeGen
parenta6cd4b9bced862533d5ce7ca4dde18d82b623801 (diff)
downloadbcm5719-llvm-c7506b28c11b4a382bd499c21ef5e6aac8c3c29f.tar.gz
bcm5719-llvm-c7506b28c11b4a382bd499c21ef5e6aac8c3c29f.zip
[DAGCombiner][AMDGPU][Mips] Fold bitcast with volatile loads if the resulting load is legal for the target.
Summary: I'm not sure if this patch is correct or if it needs more qualifying somehow. Bitcast shouldn't change the size of the load so it should be ok? We already do something similar for stores. We'll change the type of a volatile store if the resulting store is Legal or Custom. I'm not sure we should be allowing Custom there... I was playing around with converting X86 atomic loads/stores(except seq_cst) into regular volatile loads and stores during lowering. This would allow some special RMW isel patterns in X86InstrCompiler.td to be removed. But there's some floating point patterns in there that didn't work because we don't fold (f64 (bitconvert (i64 volatile load))) or (f32 (bitconvert (i32 volatile load))). Reviewers: efriedma, atanasyan, arsenm Reviewed By: efriedma Subscribers: jvesely, arsenm, sdardis, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, arichardson, jrtc27, atanasyan, jfb, llvm-commits Differential Revision: https://reviews.llvm.org/D50491 llvm-svn: 340797
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll5
-rw-r--r--llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll12
-rw-r--r--llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll12
-rw-r--r--llvm/test/CodeGen/Mips/msa/bitcast.ll25
4 files changed, 19 insertions, 35 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
index 04aef6dad0c..91070878b0f 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
@@ -147,10 +147,7 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3
}
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
+; GCN: {{buffer|flat}}_load_dword
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
diff --git a/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll b/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll
index e527866eb97..ca0794b22dc 100644
--- a/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll
+++ b/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll
@@ -18,14 +18,10 @@ entry:
}
; ALL-LABEL: retldouble:
-; N32-DAG: ld [[R2:\$[0-9]+]], %lo(fp128)([[R1:\$[0-9]+]])
+; N32-DAG: ldc1 $f0, %lo(fp128)([[R1:\$[0-9]+]])
; N32-DAG: addiu [[R3:\$[0-9]+]], [[R1]], %lo(fp128)
-; N32-DAG: ld [[R4:\$[0-9]+]], 8([[R3]])
-; N32-DAG: dmtc1 [[R2]], $f0
-; N32-DAG: dmtc1 [[R4]], $f2
+; N32-DAG: ldc1 $f2, 8([[R3]])
; N64-DAG: lui [[R2:\$[0-9]+]], %highest(fp128)
-; N64-DAG: ld [[R3:\$[0-9]+]], %lo(fp128)([[R2]])
-; N64-DAG: ld [[R4:\$[0-9]+]], 8([[R2]])
-; N64-DAG: dmtc1 [[R3]], $f0
-; N64-DAG: dmtc1 [[R4]], $f2
+; N64-DAG: ldc1 $f0, %lo(fp128)([[R2]])
+; N64-DAG: ldc1 $f2, 8([[R2]])
diff --git a/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll b/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
index 492db768950..43b0baa7001 100644
--- a/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
+++ b/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
@@ -23,14 +23,10 @@ entry:
; is returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
; match the de facto ABI as implemented by GCC.
; N32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_fp128)
-; N32-DAG: ld [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
-; N32-DAG: dmtc1 [[R2]], $f0
+; N32-DAG: ldc1 $f0, %lo(struct_fp128)([[R1]])
; N32-DAG: addiu [[R3:\$[0-9]+]], [[R1]], %lo(struct_fp128)
-; N32-DAG: ld [[R4:\$[0-9]+]], 8([[R3]])
-; N32-DAG: dmtc1 [[R4]], $f1
+; N32-DAG: ldc1 $f1, 8([[R3]])
; N64-DAG: lui [[R1:\$[0-9]+]], %highest(struct_fp128)
-; N64-DAG: ld [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
-; N64-DAG: dmtc1 [[R2]], $f0
-; N64-DAG: ld [[R4:\$[0-9]+]], 8([[R1]])
-; N64-DAG: dmtc1 [[R4]], $f1
+; N64-DAG: ldc1 $f0, %lo(struct_fp128)([[R1]])
+; N64-DAG: ldc1 $f1, 8([[R1]])
diff --git a/llvm/test/CodeGen/Mips/msa/bitcast.ll b/llvm/test/CodeGen/Mips/msa/bitcast.ll
index 837cc28aa82..f81a9c9c45f 100644
--- a/llvm/test/CodeGen/Mips/msa/bitcast.ll
+++ b/llvm/test/CodeGen/Mips/msa/bitcast.ll
@@ -362,14 +362,13 @@ entry:
}
; LITENDIAN: v8f16_to_v16i8:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
; LITENDIAN: st.b [[R3]],
; LITENDIAN: .size v8f16_to_v16i8
; BIGENDIAN: v8f16_to_v16i8:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
; BIGENDIAN: st.b [[R4]],
; BIGENDIAN: .size v8f16_to_v16i8
@@ -431,14 +430,13 @@ entry:
}
; LITENDIAN: v8f16_to_v4i32:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
; LITENDIAN: st.w [[R2]],
; LITENDIAN: .size v8f16_to_v4i32
; BIGENDIAN: v8f16_to_v4i32:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
; BIGENDIAN: st.w [[R3]],
; BIGENDIAN: .size v8f16_to_v4i32
@@ -455,14 +453,13 @@ entry:
}
; LITENDIAN: v8f16_to_v4f32:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
; LITENDIAN: st.w [[R2]],
; LITENDIAN: .size v8f16_to_v4f32
; BIGENDIAN: v8f16_to_v4f32:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
; BIGENDIAN: st.w [[R3]],
; BIGENDIAN: .size v8f16_to_v4f32
@@ -479,14 +476,13 @@ entry:
}
; LITENDIAN: v8f16_to_v2i64:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
; LITENDIAN: st.d [[R2]],
; LITENDIAN: .size v8f16_to_v2i64
; BIGENDIAN: v8f16_to_v2i64:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
; BIGENDIAN: st.d [[R3]],
; BIGENDIAN: .size v8f16_to_v2i64
@@ -503,14 +499,13 @@ entry:
}
; LITENDIAN: v8f16_to_v2f64:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
; LITENDIAN: st.d [[R2]],
; LITENDIAN: .size v8f16_to_v2f64
; BIGENDIAN: v8f16_to_v2f64:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
; BIGENDIAN: st.d [[R3]],
; BIGENDIAN: .size v8f16_to_v2f64
OpenPOWER on IntegriCloud