[WebAssembly] Call memcpy for large byval copies.

This fixes very slow compilation on test/CodeGen/Generic/2010-11-04-BigByval.ll . Note that MaxStoresPerMemcpy and friends are not yet carefully tuned so the cutoff point is currently somewhat arbitrary. However, it's important that there be a cutoff point so that we don't emit unbounded quantities of loads and stores. llvm-svn: 261050
author: Dan Gohman <dan433584@gmail.com> 2016-02-17 01:43:37 +0000
committer: Dan Gohman <dan433584@gmail.com> 2016-02-17 01:43:37 +0000
commit: 476ffcec04db048a58047004e98854e3aee482ef (patch)
tree: e5d73ce0c38f86c79d73dc8d8c44e450be1fcc81
parent: f55ebf0e3952fa5578dd8553b13130154bc0f71e (diff)
download: bcm5719-llvm-476ffcec04db048a58047004e98854e3aee482ef.tar.gz
bcm5719-llvm-476ffcec04db048a58047004e98854e3aee482ef.zip
2 files changed, 15 insertions, 18 deletions
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 796b42c77ce..7b3649b5a09 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -321,7 +321,7 @@ SDValue WebAssemblyTargetLowering::LowerCall(
       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
       Chain = DAG.getMemcpy(
           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(),
-          /*isVolatile*/ false, /*AlwaysInline=*/true,
+          /*isVolatile*/ false, /*AlwaysInline=*/false,
           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
       OutVal = FINode;
     }
diff --git a/llvm/test/CodeGen/WebAssembly/byval.ll b/llvm/test/CodeGen/WebAssembly/byval.ll
index 9a9ab6500c8..d5978d0c064 100644
--- a/llvm/test/CodeGen/WebAssembly/byval.ll
+++ b/llvm/test/CodeGen/WebAssembly/byval.ll
@@ -81,23 +81,6 @@ define void @byval_arg_double(%AlignedStruct* %ptr) {
  ret void
 }
 
-; CHECK-LABEL: byval_arg_big
-define void @byval_arg_big(%BigArray* %ptr) {
- ; CHECK: .param i32
- ; Subtract 48 from SP (SP is 16-byte aligned)
- ; CHECK: i32.const [[L2:.+]]=, 48
- ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, [[L2]]
- ; Copy the AlignedStruct argument to the stack (SP+12, original SP-36)
- ; CHECK: i64.load $push[[L4:.+]]=, 0($0):p2align=0
- ; CHECK: i64.store {{.*}}=, 12([[SP]]):p2align=2, $pop[[L4]]
- ; Pass a pointer to the stack slot to the function
- ; CHECK-NEXT: i32.const [[L5:.+]]=, 12
- ; CHECK-NEXT: i32.add [[ARG:.+]]=, [[SP]], [[L5]]
- ; CHECK-NEXT: call ext_byval_func_bigarray@FUNCTION, [[ARG]]
- call void @ext_byval_func_bigarray(%BigArray* byval %ptr)
- ret void
-}
-
 ; CHECK-LABEL: byval_param
 define void @byval_param(%SmallStruct* byval align 32 %ptr) {
  ; CHECK: .param i32
@@ -122,3 +105,17 @@ define void @byval_empty_callee(%EmptyStruct* byval %ptr) {
  call void @ext_func_empty(%EmptyStruct* %ptr)
  ret void
 }
+
+; Call memcpy for "big" byvals.
+; TODO: When the prolog/epilog sequences are optimized, refine these checks to
+; be more specific.
+
+; CHECK-LABEL: big_byval:
+; CHECK:      i32.call       ${{[^,]+}}=, memcpy@FUNCTION,
+; CHECK-NEXT: call           big_byval_callee@FUNCTION,
+%big = type [131072 x i8]
+declare void @big_byval_callee(%big* byval align 1)
+define void @big_byval(%big* byval align 1 %x) {
+  call void @big_byval_callee(%big* byval align 1 %x)
+  ret void
+}
author	Dan Gohman <dan433584@gmail.com>	2016-02-17 01:43:37 +0000
committer	Dan Gohman <dan433584@gmail.com>	2016-02-17 01:43:37 +0000
commit	476ffcec04db048a58047004e98854e3aee482ef (patch)
tree	e5d73ce0c38f86c79d73dc8d8c44e450be1fcc81
parent	f55ebf0e3952fa5578dd8553b13130154bc0f71e (diff)
download	bcm5719-llvm-476ffcec04db048a58047004e98854e3aee482ef.tar.gz bcm5719-llvm-476ffcec04db048a58047004e98854e3aee482ef.zip