From 1e68724d24ba38de7c7cdb2e1939d78c8b37cc0d Mon Sep 17 00:00:00 2001 From: Daniel Neilson Date: Fri, 19 Jan 2018 17:13:12 +0000 Subject: Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1) Summary: This is a resurrection of work first proposed and discussed in Aug 2015: http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html and initially landed (but then backed out) in Nov 2015: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument which is required to be a constant integer. It represents the alignment of the dest (and source), and so must be the minimum of the actual alignment of the two. This change is the first in a series that allows source and dest to each have their own alignments by using the alignment attribute on their arguments. In this change we: 1) Remove the alignment argument. 2) Add alignment attributes to the source & dest arguments. We, temporarily, require that the alignments for source & dest be equal. For example, code which used to read: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false) will now read call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false) Downstream users may have to update their lit tests that check for @llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script may help with updating the majority of your tests, but it does not catch all possible patterns so some manual checking and updating will be required. s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g The remaining changes in the series will: Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing source and dest alignments. Step 3) Update Clang to use the new IRBuilder API. Step 4) Update Polly to use the new IRBuilder API. Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API, and those that use use MemIntrinsicInst::[get|set]Alignment() to use getDestAlignment() and getSourceAlignment() instead. Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the MemIntrinsicInst::[get|set]Alignment() methods. Reviewers: pete, hfinkel, lhames, reames, bollu Reviewed By: reames Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits Differential Revision: https://reviews.llvm.org/D41675 llvm-svn: 322965 --- .../AArch64/GlobalISel/arm64-irtranslator.ll | 12 +- llvm/test/CodeGen/AArch64/PBQP-csr.ll | 4 +- ...64-DAGCombine-findBetterNeighborChains-crash.ll | 6 +- .../AArch64/arm64-2012-05-07-MemcpyAlignBug.ll | 4 +- llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll | 4 +- llvm/test/CodeGen/AArch64/arm64-abi_align.ll | 18 +-- .../CodeGen/AArch64/arm64-fast-isel-intrinsic.ll | 24 +-- llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll | 20 +-- llvm/test/CodeGen/AArch64/arm64-memset-inline.ll | 8 +- llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll | 8 +- .../AArch64/arm64-misaligned-memcpy-inline.ll | 4 +- .../CodeGen/AArch64/arm64-misched-basic-A53.ll | 6 +- .../CodeGen/AArch64/arm64-misched-basic-A57.ll | 6 +- llvm/test/CodeGen/AArch64/arm64-stur.ll | 4 +- llvm/test/CodeGen/AArch64/arm64-virtual_base.ll | 4 +- llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll | 4 +- llvm/test/CodeGen/AArch64/func-argpassing.ll | 4 +- .../AArch64/ldp-stp-scaled-unscaled-pairs.ll | 4 +- llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll | 4 +- llvm/test/CodeGen/AArch64/ldst-zero.ll | 4 +- llvm/test/CodeGen/AArch64/machine-combiner-madd.ll | 4 +- llvm/test/CodeGen/AArch64/memcpy-f128.ll | 4 +- .../test/CodeGen/AArch64/merge-store-dependency.ll | 4 +- .../CodeGen/AArch64/mergestores_noimplicitfloat.ll | 4 +- llvm/test/CodeGen/AArch64/misched-stp.ll | 4 +- llvm/test/CodeGen/AArch64/pr33172.ll | 4 +- .../CodeGen/AArch64/tailcall-mem-intrinsics.ll | 12 +- llvm/test/CodeGen/AArch64/tailcall-string-rvo.ll | 4 +- .../CodeGen/AMDGPU/addrspacecast-constantexpr.ll | 4 +- llvm/test/CodeGen/AMDGPU/lds-alignment.ll | 128 ++++++++-------- llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll | 28 ++-- llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll | 42 +++--- .../AMDGPU/promote-alloca-mem-intrinsics.ll | 30 ++-- llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll | 4 +- llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll | 4 +- .../test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll | 4 +- llvm/test/CodeGen/ARM/2011-10-26-memset-inline.ll | 4 +- .../CodeGen/ARM/2011-10-26-memset-with-neon.ll | 4 +- .../CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll | 2 +- llvm/test/CodeGen/ARM/Windows/memset.ll | 4 +- llvm/test/CodeGen/ARM/Windows/no-aeabi.ll | 8 +- llvm/test/CodeGen/ARM/arm-eabi.ll | 14 +- llvm/test/CodeGen/ARM/constantpool-promote-ldrh.ll | 4 +- llvm/test/CodeGen/ARM/constantpool-promote.ll | 14 +- llvm/test/CodeGen/ARM/crash-O0.ll | 6 +- llvm/test/CodeGen/ARM/debug-info-blocks.ll | 6 +- llvm/test/CodeGen/ARM/dyn-stackalloc.ll | 4 +- llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll | 20 +-- llvm/test/CodeGen/ARM/interval-update-remat.ll | 4 +- .../CodeGen/ARM/ldm-stm-base-materialization.ll | 10 +- llvm/test/CodeGen/ARM/machine-cse-cmp.ll | 4 +- llvm/test/CodeGen/ARM/memcpy-inline.ll | 20 +-- llvm/test/CodeGen/ARM/memcpy-ldm-stm.ll | 20 +-- llvm/test/CodeGen/ARM/memcpy-no-inline.ll | 6 +- llvm/test/CodeGen/ARM/memfunc.ll | 90 +++++------ llvm/test/CodeGen/ARM/memset-inline.ll | 12 +- .../CodeGen/ARM/stack-protector-bmovpcb_call.ll | 4 +- llvm/test/CodeGen/ARM/struct-byval-frame-index.ll | 8 +- llvm/test/CodeGen/ARM/tailcall-mem-intrinsics.ll | 12 +- .../test/CodeGen/AVR/std-ldd-immediate-overflow.ll | 4 +- llvm/test/CodeGen/BPF/byval.ll | 4 +- llvm/test/CodeGen/BPF/ex1.ll | 6 +- llvm/test/CodeGen/BPF/fi_ri.ll | 4 +- llvm/test/CodeGen/BPF/reloc.ll | 6 +- llvm/test/CodeGen/BPF/rodata_1.ll | 6 +- llvm/test/CodeGen/BPF/rodata_2.ll | 4 +- llvm/test/CodeGen/BPF/rodata_3.ll | 4 +- llvm/test/CodeGen/BPF/rodata_4.ll | 4 +- llvm/test/CodeGen/BPF/sanity.ll | 4 +- llvm/test/CodeGen/BPF/undef.ll | 4 +- llvm/test/CodeGen/BPF/warn-call.ll | 4 +- llvm/test/CodeGen/Generic/ForceStackAlign.ll | 4 +- llvm/test/CodeGen/Generic/invalid-memcpy.ll | 4 +- .../test/CodeGen/Hexagon/adjust-latency-stackST.ll | 6 +- .../CodeGen/Hexagon/branchfolder-keep-impdef.ll | 4 +- .../CodeGen/Hexagon/early-if-conversion-bug1.ll | 12 +- llvm/test/CodeGen/Hexagon/mem-fi-add.ll | 4 +- llvm/test/CodeGen/Hexagon/memcpy-likely-aligned.ll | 4 +- llvm/test/CodeGen/Hexagon/rdf-filter-defs.ll | 10 +- .../test/CodeGen/Hexagon/store-imm-stack-object.ll | 6 +- .../CodeGen/Hexagon/tail-call-mem-intrinsics.ll | 12 +- llvm/test/CodeGen/MSP430/memset.ll | 4 +- llvm/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll | 4 +- llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll | 15 +- llvm/test/CodeGen/Mips/biggot.ll | 4 +- ...rguments-small-structures-bigger-than-32bits.ll | 6 +- .../cconv/arguments-varargs-small-structs-byte.ll | 12 +- ...arguments-varargs-small-structs-combinations.ll | 6 +- llvm/test/CodeGen/Mips/cconv/return-struct.ll | 6 +- llvm/test/CodeGen/Mips/largeimmprinting.ll | 4 +- llvm/test/CodeGen/Mips/long-calls.ll | 4 +- llvm/test/CodeGen/Mips/memcpy.ll | 4 +- llvm/test/CodeGen/Mips/pr33978.ll | 6 +- llvm/test/CodeGen/Mips/tailcall/tailcall.ll | 4 +- llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll | 20 +-- .../CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll | 2 +- .../PowerPC/2011-12-06-SpillAndRestoreCR.ll | 2 +- llvm/test/CodeGen/PowerPC/MMO-flags-assertion.ll | 4 +- .../CodeGen/PowerPC/aantidep-inline-asm-use.ll | 4 +- llvm/test/CodeGen/PowerPC/ctrloop-reg.ll | 2 +- llvm/test/CodeGen/PowerPC/emptystruct.ll | 4 +- llvm/test/CodeGen/PowerPC/fsl-e500mc.ll | 4 +- llvm/test/CodeGen/PowerPC/fsl-e5500.ll | 4 +- llvm/test/CodeGen/PowerPC/glob-comp-aa-crash.ll | 4 +- llvm/test/CodeGen/PowerPC/isel-rc-nox0.ll | 4 +- llvm/test/CodeGen/PowerPC/licm-remat.ll | 8 +- .../CodeGen/PowerPC/lxv-aligned-stack-slots.ll | 6 +- llvm/test/CodeGen/PowerPC/memcpy-vec.ll | 12 +- .../test/CodeGen/PowerPC/memcpy_dereferenceable.ll | 12 +- llvm/test/CodeGen/PowerPC/memset-nc-le.ll | 4 +- llvm/test/CodeGen/PowerPC/memset-nc.ll | 6 +- llvm/test/CodeGen/PowerPC/merge-st-chain-op.ll | 4 +- llvm/test/CodeGen/PowerPC/ppc-empty-fs.ll | 4 +- llvm/test/CodeGen/PowerPC/pr27350.ll | 4 +- llvm/test/CodeGen/PowerPC/resolvefi-basereg.ll | 28 ++-- llvm/test/CodeGen/PowerPC/resolvefi-disp.ll | 16 +- llvm/test/CodeGen/PowerPC/structsinmem.ll | 30 ++-- llvm/test/CodeGen/PowerPC/structsinregs.ll | 30 ++-- llvm/test/CodeGen/PowerPC/stwu8.ll | 4 +- llvm/test/CodeGen/PowerPC/tailcall-string-rvo.ll | 4 +- llvm/test/CodeGen/PowerPC/toc-load-sched-bug.ll | 14 +- llvm/test/CodeGen/RISCV/frame.ll | 4 +- llvm/test/CodeGen/SystemZ/dag-combine-02.ll | 4 +- llvm/test/CodeGen/SystemZ/loop-01.ll | 10 +- llvm/test/CodeGen/SystemZ/loop-03.ll | 6 +- llvm/test/CodeGen/SystemZ/memcpy-01.ll | 92 +++++------- llvm/test/CodeGen/SystemZ/memset-01.ll | 62 ++++---- llvm/test/CodeGen/SystemZ/memset-02.ll | 68 ++++----- llvm/test/CodeGen/SystemZ/memset-03.ll | 164 ++++++++++----------- llvm/test/CodeGen/SystemZ/memset-04.ll | 164 ++++++++++----------- .../CodeGen/SystemZ/tail-call-mem-intrinsics.ll | 12 +- llvm/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll | 10 +- llvm/test/CodeGen/Thumb/dyn-stackalloc.ll | 4 +- .../Thumb/ldm-stm-base-materialization-thumb2.ll | 10 +- .../CodeGen/Thumb/ldm-stm-base-materialization.ll | 10 +- .../Thumb/stack-coloring-without-frame-ptr.ll | 4 +- .../CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll | 4 +- llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll | 4 +- llvm/test/CodeGen/WebAssembly/global.ll | 4 +- llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll | 26 ++-- llvm/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll | 2 +- llvm/test/CodeGen/X86/2009-01-25-NoSSE.ll | 4 +- llvm/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll | 4 +- llvm/test/CodeGen/X86/2010-04-08-CoalescerBug.ll | 4 +- llvm/test/CodeGen/X86/2010-04-21-CoalescerBug.ll | 4 +- .../X86/2010-06-25-CoalescerSubRegDefDead.ll | 4 +- .../CodeGen/X86/2010-09-17-SideEffectsInChain.ll | 4 +- .../CodeGen/X86/2012-01-10-UndefExceptionEdge.ll | 8 +- llvm/test/CodeGen/X86/alignment-2.ll | 4 +- llvm/test/CodeGen/X86/bug26810.ll | 4 +- llvm/test/CodeGen/X86/darwin-bzero.ll | 4 +- llvm/test/CodeGen/X86/fast-isel-call.ll | 8 +- llvm/test/CodeGen/X86/fast-isel-deadcode.ll | 6 +- llvm/test/CodeGen/X86/fast-isel-x86-64.ll | 4 +- llvm/test/CodeGen/X86/force-align-stack-alloca.ll | 4 +- llvm/test/CodeGen/X86/immediate_merging.ll | 4 +- llvm/test/CodeGen/X86/immediate_merging64.ll | 4 +- llvm/test/CodeGen/X86/lea-opt-memop-check-1.ll | 4 +- llvm/test/CodeGen/X86/load-slice.ll | 2 +- llvm/test/CodeGen/X86/lsr-normalization.ll | 4 +- llvm/test/CodeGen/X86/mcu-abi.ll | 4 +- llvm/test/CodeGen/X86/mem-intrin-base-reg.ll | 10 +- llvm/test/CodeGen/X86/memcpy-2.ll | 10 +- llvm/test/CodeGen/X86/memcpy-from-string.ll | 4 +- llvm/test/CodeGen/X86/memcpy.ll | 24 +-- llvm/test/CodeGen/X86/memset-2.ll | 10 +- llvm/test/CodeGen/X86/memset-3.ll | 4 +- llvm/test/CodeGen/X86/memset-nonzero.ll | 12 +- .../CodeGen/X86/memset-sse-stack-realignment.ll | 6 +- llvm/test/CodeGen/X86/memset.ll | 6 +- llvm/test/CodeGen/X86/memset64-on-x86-32.ll | 4 +- llvm/test/CodeGen/X86/misaligned-memset.ll | 4 +- llvm/test/CodeGen/X86/misched-new.ll | 4 +- llvm/test/CodeGen/X86/negate-add-zero.ll | 2 +- llvm/test/CodeGen/X86/optimize-max-0.ll | 20 +-- llvm/test/CodeGen/X86/pr11985.ll | 4 +- llvm/test/CodeGen/X86/pr14333.ll | 4 +- llvm/test/CodeGen/X86/pr34088.ll | 6 +- llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 4 +- llvm/test/CodeGen/X86/regparm.ll | 6 +- llvm/test/CodeGen/X86/remat-fold-load.ll | 6 +- llvm/test/CodeGen/X86/slow-unaligned-mem.ll | 4 +- llvm/test/CodeGen/X86/small-byval-memcpy.ll | 4 +- llvm/test/CodeGen/X86/stack-align.ll | 4 +- llvm/test/CodeGen/X86/stack-protector.ll | 6 +- .../CodeGen/X86/tail-dup-merge-loop-headers.ll | 4 +- llvm/test/CodeGen/X86/tailcall-mem-intrinsics.ll | 18 +-- llvm/test/CodeGen/X86/tlv-1.ll | 4 +- llvm/test/CodeGen/X86/unaligned-load.ll | 4 +- llvm/test/CodeGen/X86/unused_stackslots.ll | 18 +-- llvm/test/CodeGen/X86/unwindraise.ll | 6 +- .../CodeGen/X86/variable-sized-darwin-bzero.ll | 4 +- llvm/test/CodeGen/X86/vectorcall.ll | 12 +- llvm/test/CodeGen/X86/x86-64-static-relo-movl.ll | 4 +- llvm/test/CodeGen/X86/x86-repmov-copy-eflags.ll | 4 +- llvm/test/CodeGen/XCore/memcpy.ll | 8 +- 196 files changed, 1070 insertions(+), 1091 deletions(-) (limited to 'llvm/test/CodeGen') diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index f710cd7551d..077c21c0557 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -1147,7 +1147,7 @@ define void()* @test_global_func() { ret void()* @allocai64 } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) define void @test_memcpy(i8* %dst, i8* %src, i64 %size) { ; CHECK-LABEL: name: test_memcpy ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 @@ -1157,11 +1157,11 @@ define void @test_memcpy(i8* %dst, i8* %src, i64 %size) { ; CHECK: %x1 = COPY [[SRC]] ; CHECK: %x2 = COPY [[SIZE]] ; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i32 1, i1 0) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) ret void } -declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile) +declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i1) define void @test_memmove(i8* %dst, i8* %src, i64 %size) { ; CHECK-LABEL: name: test_memmove ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 @@ -1171,11 +1171,11 @@ define void @test_memmove(i8* %dst, i8* %src, i64 %size) { ; CHECK: %x1 = COPY [[SRC]] ; CHECK: %x2 = COPY [[SIZE]] ; CHECK: BL &memmove, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2 - call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i32 1, i1 0) + call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) ret void } -declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32 %align, i1 %volatile) +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) define void @test_memset(i8* %dst, i8 %val, i64 %size) { ; CHECK-LABEL: name: test_memset ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 @@ -1187,7 +1187,7 @@ define void @test_memset(i8* %dst, i8 %val, i64 %size) { ; CHECK: %w1 = COPY [[SRC_TMP]] ; CHECK: %x2 = COPY [[SIZE]] ; CHECK: BL &memset, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %w1, implicit %x2 - call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i32 1, i1 0) + call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i1 0) ret void } diff --git a/llvm/test/CodeGen/AArch64/PBQP-csr.ll b/llvm/test/CodeGen/AArch64/PBQP-csr.ll index 16d7f8cb7a5..e071eda17e3 100644 --- a/llvm/test/CodeGen/AArch64/PBQP-csr.ll +++ b/llvm/test/CodeGen/AArch64/PBQP-csr.ll @@ -22,7 +22,7 @@ entry: %z.i60 = getelementptr inbounds %rs, %rs* %r, i64 0, i32 9, i32 2 %na = getelementptr inbounds %rs, %rs* %r, i64 0, i32 0 %0 = bitcast double* %x.i to i8* - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 72, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 72, i1 false) %1 = load i32, i32* %na, align 4 %cmp70 = icmp sgt i32 %1, 0 br i1 %cmp70, label %for.body.lr.ph, label %for.end @@ -87,5 +87,5 @@ for.end: ; preds = %for.end.loopexit, % } ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) diff --git a/llvm/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll index fb4df34df29..043ce0933a9 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll @@ -6,13 +6,13 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" declare void @extern(i8*) ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0 ; Function Attrs: nounwind define void @func(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 { bb: %tmp = getelementptr inbounds i8, i8* %arg2, i64 88 - tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 noalias %arg2, i8 0, i64 40, i1 false) store i8 0, i8* %arg3 store i8 2, i8* %arg2 store float 0.000000e+00, float* %arg @@ -27,7 +27,7 @@ bb: define void @func2(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 { bb: %tmp = getelementptr inbounds i8, i8* %arg2, i64 88 - tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 noalias %arg2, i8 0, i64 40, i1 false) store i8 0, i8* %arg3 store i8 2, i8* %arg2 store float 0.000000e+00, float* %arg diff --git a/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll b/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll index b38b4f2a2b2..2b6cd7c2d28 100644 --- a/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll +++ b/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll @@ -14,8 +14,8 @@ ; CHECK-NEXT: str [[VAL2]], [x0] define void @foo(i8* %a) { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([3 x i32]* @b to i8*), i64 12, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast ([3 x i32]* @b to i8*), i64 12, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll index d6a1686d566..e0fa5dbbaf9 100644 --- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -164,10 +164,10 @@ entry: %4 = bitcast i8* %ap.align to %struct.s41* %5 = bitcast %struct.s41* %vs to i8* %6 = bitcast %struct.s41* %4 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* %6, i64 16, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %5, i8* align 16 %6, i64 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind define void @bar2(i32 %x, i128 %s41.coerce) nounwind { entry: diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll index 56a882a2a15..bfb74b598ff 100644 --- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll @@ -300,14 +300,14 @@ entry: %tmp = alloca %struct.s42, align 4 %tmp1 = alloca %struct.s42, align 4 %0 = bitcast %struct.s42* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s42* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5 ret i32 %call } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #4 declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1, @@ -346,9 +346,9 @@ entry: %tmp = alloca %struct.s42, align 4 %tmp1 = alloca %struct.s42, align 4 %0 = bitcast %struct.s42* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s42* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5 ret i32 %call @@ -414,9 +414,9 @@ entry: %tmp = alloca %struct.s43, align 16 %tmp1 = alloca %struct.s43, align 16 %0 = bitcast %struct.s43* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s43* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5 ret i32 %call } @@ -465,9 +465,9 @@ entry: %tmp = alloca %struct.s43, align 16 %tmp1 = alloca %struct.s43, align 16 %0 = bitcast %struct.s43* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s43* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5 ret i32 %call diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll index 1af960a12a1..e43160ab340 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll @@ -11,11 +11,11 @@ define void @t1() { ; ARM64: mov x2, #80 ; ARM64: uxtb w1, w9 ; ARM64: bl _memset - call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i32 16, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) define void @t2() { ; ARM64-LABEL: t2 @@ -25,11 +25,11 @@ define void @t2() { ; ARM64: add x1, x8, _message@PAGEOFF ; ARM64: mov x2, #80 ; ARM64: bl _memcpy - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) define void @t3() { ; ARM64-LABEL: t3 @@ -39,11 +39,11 @@ define void @t3() { ; ARM64: add x1, x8, _message@PAGEOFF ; ARM64: mov x2, #20 ; ARM64: bl _memmove - call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i32 16, i1 false) + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i1 false) ret void } -declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) define void @t4() { ; ARM64-LABEL: t4 @@ -58,7 +58,7 @@ define void @t4() { ; ARM64: ldrb w11, [x9, #16] ; ARM64: strb w11, [x8, #16] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false) ret void } @@ -75,7 +75,7 @@ define void @t5() { ; ARM64: ldrb w11, [x9, #16] ; ARM64: strb w11, [x8, #16] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false) ret void } @@ -92,7 +92,7 @@ define void @t6() { ; ARM64: ldrb w10, [x9, #8] ; ARM64: strb w10, [x8, #8] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i1 false) ret void } @@ -111,7 +111,7 @@ define void @t7() { ; ARM64: ldrb w10, [x9, #6] ; ARM64: strb w10, [x8, #6] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i1 false) ret void } @@ -130,7 +130,7 @@ define void @t8() { ; ARM64: ldrb w10, [x9, #3] ; ARM64: strb w10, [x8, #3] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i1 false) ret void } @@ -143,6 +143,6 @@ define void @test_distant_memcpy(i8* %dst) { ; ARM64: strb [[BYTE]], [x0] %array = alloca i8, i32 8192 %elem = getelementptr i8, i8* %array, i32 8000 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i1 false) ret void } diff --git a/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll index 0590031fbcd..4f8f3a227bb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll @@ -22,7 +22,7 @@ entry: ; CHECK: strh [[REG1]], [x[[BASEREG2]], #8] ; CHECK: ldr [[REG2:x[0-9]+]], ; CHECK: str [[REG2]], - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false) ret i32 0 } @@ -33,7 +33,7 @@ entry: ; CHECK: stur [[DEST]], [x0, #15] ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] ; CHECK: str [[DEST]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i1 false) ret void } @@ -45,7 +45,7 @@ entry: ; CHECK: str [[REG3]], [x0, #32] ; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}] ; CHECK: stp [[DEST1]], [[DEST2]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i1 false) ret void } @@ -56,7 +56,7 @@ entry: ; CHECK: str [[REG4]], [x0, #16] ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] ; CHECK: str [[DEST]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false) ret void } @@ -67,7 +67,7 @@ entry: ; CHECK: strh [[REG5]], [x0, #16] ; CHECK: ldr [[REG6:q[0-9]+]], [x{{[0-9]+}}] ; CHECK: str [[REG6]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false) ret void } @@ -80,7 +80,7 @@ entry: ; CHECK: mov [[REG8:w[0-9]+]], ; CHECK: movk [[REG8]], ; CHECK: str [[REG8]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false) ret void } @@ -91,7 +91,7 @@ entry: ; CHECK: stur [[REG9]], [x{{[0-9]+}}, #6] ; CHECK: ldr ; CHECK: str - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false) ret void } @@ -104,9 +104,9 @@ entry: ; CHECK: str [[REG10]], [x0] %0 = bitcast %struct.Foo* %a to i8* %1 = bitcast %struct.Foo* %b to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll index 8c872cc6150..ecdfcc6673a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -5,7 +5,7 @@ entry: ; CHECK-LABEL: t1: ; CHECK: str wzr, [x0, #8] ; CHECK: str xzr, [x0] - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 12, i1 false) ret void } @@ -17,11 +17,11 @@ entry: ; CHECK: str xzr, [sp, #8] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i1 false) call void @something(i8* %0) nounwind ret void } declare void @something(i8*) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll b/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll index 3466e1bace5..87a0232c734 100644 --- a/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll @@ -10,11 +10,11 @@ ; CHECK-LINUX: {{b|bl}} memset define void @fct1(i8* nocapture %ptr) { entry: - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) ; CHECK-LABEL: fct2: ; When the size is bigger than 256, change into bzero. @@ -22,7 +22,7 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) ; CHECK-LINUX: {{b|bl}} memset define void @fct2(i8* nocapture %ptr) { entry: - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i1 false) ret void } @@ -33,7 +33,7 @@ entry: define void @fct3(i8* nocapture %ptr, i32 %unknown) { entry: %conv = sext i32 %unknown to i64 - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i1 false) ret void } diff --git a/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll index 85572f2cf0f..7ecf214b4be 100644 --- a/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll @@ -7,8 +7,8 @@ define void @t0(i8* %out, i8* %in) { ; CHECK: orr w2, wzr, #0x10 ; CHECK-NEXT: bl _memcpy entry: - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) diff --git a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll index 07df9cb32db..f0b9ccc8b5d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll +++ b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll @@ -26,9 +26,9 @@ entry: %yy = alloca i32, align 4 store i32 0, i32* %retval %0 = bitcast [8 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false) %1 = bitcast [8 x i32]* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false) store i32 0, i32* %xx, align 4 store i32 0, i32* %yy, align 4 store i32 0, i32* %i, align 4 @@ -105,7 +105,7 @@ define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) { } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll index 711d2f7397b..c2f53e88a95 100644 --- a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll +++ b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll @@ -32,9 +32,9 @@ entry: %yy = alloca i32, align 4 store i32 0, i32* %retval %0 = bitcast [8 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false) %1 = bitcast [8 x i32]* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false) store i32 0, i32* %xx, align 4 store i32 0, i32* %yy, align 4 store i32 0, i32* %i, align 4 @@ -106,7 +106,7 @@ for.end: ; preds = %for.cond ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/arm64-stur.ll b/llvm/test/CodeGen/AArch64/arm64-stur.ll index 4a3229a39b5..8e0736c4fba 100644 --- a/llvm/test/CodeGen/AArch64/arm64-stur.ll +++ b/llvm/test/CodeGen/AArch64/arm64-stur.ll @@ -55,11 +55,11 @@ define void @foo(%struct.X* nocapture %p) nounwind optsize ssp { ; CHECK-NEXT: ret %B = getelementptr inbounds %struct.X, %struct.X* %p, i64 0, i32 1 %val = bitcast i64* %B to i8* - call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind ; Unaligned 16b stores are split into 8b stores for performance. ; radar://15424193 diff --git a/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll b/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll index 4ecfde4f83e..4ce0d2f0007 100644 --- a/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll +++ b/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll @@ -43,9 +43,9 @@ entry: %tmp14 = bitcast double* %arraydecay5.3.1 to i8* %arraydecay11.3.1 = getelementptr inbounds %struct.Bicubic_Patch_Struct, %struct.Bicubic_Patch_Struct* %Shape, i64 0, i32 12, i64 1, i64 3, i64 0 %tmp15 = bitcast double* %arraydecay11.3.1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i1 false) ret void } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) diff --git a/llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll b/llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll index 07595a954db..290e0c918ad 100644 --- a/llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll @@ -8,8 +8,8 @@ define void @test(i64 %a, i8* %b) { %1 = and i64 %a, 9223372036854775807 %2 = inttoptr i64 %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %b, i64 8, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %2, i8* align 8 %b, i64 8, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) diff --git a/llvm/test/CodeGen/AArch64/func-argpassing.ll b/llvm/test/CodeGen/AArch64/func-argpassing.ll index cf6545dab38..824a1893940 100644 --- a/llvm/test/CodeGen/AArch64/func-argpassing.ll +++ b/llvm/test/CodeGen/AArch64/func-argpassing.ll @@ -186,11 +186,11 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define i32 @test_extern() { ; CHECK-LABEL: test_extern: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 undef, i8* align 4 undef, i32 undef, i1 0) ; CHECK: bl memcpy ret i32 0 } diff --git a/llvm/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll b/llvm/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll index 35117a147ee..951bd4ada3c 100644 --- a/llvm/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll +++ b/llvm/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll @@ -115,11 +115,11 @@ entry: %C = getelementptr inbounds [12 x i8], [12 x i8]* %a2, i64 0, i64 4 %1 = bitcast i8* %C to i64* store i64 0, i64* %1, align 4 - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 8, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll index 9c698b5fdcc..0f8ffb50c8d 100644 --- a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -5,7 +5,7 @@ target triple = "aarch64--linux-gnu" declare void @f(i8*, i8*) declare void @f2(i8*, i8*) declare void @_Z5setupv() -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #3 define i32 @main() local_unnamed_addr #1 { ; Make sure the stores happen in the correct order (the exact instructions could change). @@ -24,7 +24,7 @@ for.body.lr.ph.i.i.i.i.i.i63: tail call void @_Z5setupv() %x2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 6 %x3 = bitcast i32* %x2 to i8* - call void @llvm.memset.p0i8.i64(i8* %x3, i8 0, i64 16, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %x3, i8 0, i64 16, i1 false) %arraydecay2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 0 %x4 = bitcast [10 x i32]* %b1 to <4 x i32>* store <4 x i32> , <4 x i32>* %x4, align 16 diff --git a/llvm/test/CodeGen/AArch64/ldst-zero.ll b/llvm/test/CodeGen/AArch64/ldst-zero.ll index 7d443a631f9..0ada6fd84cb 100644 --- a/llvm/test/CodeGen/AArch64/ldst-zero.ll +++ b/llvm/test/CodeGen/AArch64/ldst-zero.ll @@ -3,7 +3,7 @@ ; Tests to check that zero stores which are generated as STP xzr, xzr aren't ; scheduled incorrectly due to incorrect alias information -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) %struct.tree_common = type { i8*, i8*, i32 } ; Original test case which exhibited the bug @@ -14,7 +14,7 @@ define void @test1(%struct.tree_common* %t, i32 %code, i8* %type) { ; CHECK-DAG: str xzr, [x0] entry: %0 = bitcast %struct.tree_common* %t to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 24, i1 false) %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2 store i32 %code, i32* %code1, align 8 %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-madd.ll b/llvm/test/CodeGen/AArch64/machine-combiner-madd.ll index 4efe4e9cfb0..5ace6e63136 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner-madd.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner-madd.ll @@ -19,7 +19,7 @@ %class.D = type { %class.basic_string.base, [4 x i8] } %class.basic_string.base = type <{ i64, i64, i32 }> @a = global %class.D* zeroinitializer, align 8 -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) define internal void @fun() section ".text.startup" { entry: %tmp.i.i = alloca %class.D, align 8 @@ -31,7 +31,7 @@ loop: %x = load %class.D*, %class.D** getelementptr inbounds (%class.D*, %class.D** @a, i64 0), align 8 %arrayidx.i.i.i = getelementptr inbounds %class.D, %class.D* %x, i64 %conv11.i.i %d = bitcast %class.D* %arrayidx.i.i.i to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %y, i8* %d, i64 24, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 nonnull %y, i8* align 8 %d, i64 24, i1 false) %inc.i.i = add i64 %i, 1 %cmp.i.i = icmp slt i64 %inc.i.i, 0 br i1 %cmp.i.i, label %loop, label %exit diff --git a/llvm/test/CodeGen/AArch64/memcpy-f128.ll b/llvm/test/CodeGen/AArch64/memcpy-f128.ll index 7e6ec36104a..8b91b843108 100644 --- a/llvm/test/CodeGen/AArch64/memcpy-f128.ll +++ b/llvm/test/CodeGen/AArch64/memcpy-f128.ll @@ -12,8 +12,8 @@ define void @test1() { ; CHECK: str q0 ; CHECK: ret entry: - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast (%structA* @stubA to i8*), i64 48, i32 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 undef, i8* align 8 bitcast (%structA* @stubA to i8*), i64 48, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll index 4f2af9ed7e6..5bed63ef895 100644 --- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -14,7 +14,7 @@ entry: ; A53: str [[DATA]], {{.*}} %0 = bitcast %struct1* %fde to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 40, i1 false) %state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4 store i16 256, i16* %state, align 8 %fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2 @@ -58,6 +58,6 @@ exit: ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) declare i32 @fcntl(i32, i32, ...) declare noalias i8* @foo() diff --git a/llvm/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll b/llvm/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll index 74aeaf75d03..cd64ae11550 100644 --- a/llvm/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll +++ b/llvm/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll @@ -16,8 +16,8 @@ target triple = "arm64-apple-ios10.0.0" ; CHECK-DAG: str [[R3]], [x0, #24] define void @pr33475(i8* %p0, i8* %p1) noimplicitfloat { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p0, i8* %p1, i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p0, i8* align 4 %p1, i64 32, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) diff --git a/llvm/test/CodeGen/AArch64/misched-stp.ll b/llvm/test/CodeGen/AArch64/misched-stp.ll index 1c9ea68834c..1afec40f192 100644 --- a/llvm/test/CodeGen/AArch64/misched-stp.ll +++ b/llvm/test/CodeGen/AArch64/misched-stp.ll @@ -30,7 +30,7 @@ entry: ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) %struct.tree_common = type { i8*, i8*, i32 } ; CHECK-LABEL: test_zero @@ -41,7 +41,7 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) { entry: %0 = bitcast %struct.tree_common* %t to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 24, i1 false) %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2 store i32 %code, i32* %code1, align 8 %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1 diff --git a/llvm/test/CodeGen/AArch64/pr33172.ll b/llvm/test/CodeGen/AArch64/pr33172.ll index 1e1da78b28f..098d5358b02 100644 --- a/llvm/test/CodeGen/AArch64/pr33172.ll +++ b/llvm/test/CodeGen/AArch64/pr33172.ll @@ -21,12 +21,12 @@ entry: %wide.load8291059.4 = load i64, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.b, i64 0, i64 18) to i64*), align 8 store i64 %wide.load8281058.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 16) to i64*), align 8 store i64 %wide.load8291059.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 18) to i64*), align 8 - tail call void @llvm.memset.p0i8.i64(i8* bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i32 8, i1 false) #2 + tail call void @llvm.memset.p0i8.i64(i8* align 8 bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i1 false) #2 unreachable } ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1 +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 attributes #1 = { argmemonly nounwind } attributes #2 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll b/llvm/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll index b970fb12415..c780d15b58d 100644 --- a/llvm/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll @@ -4,7 +4,7 @@ ; CHECK: b memcpy define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret void } @@ -12,7 +12,7 @@ entry: ; CHECK: b memmove define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret void } @@ -20,12 +20,12 @@ entry: ; CHECK: b memset define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 { entry: - tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/tailcall-string-rvo.ll b/llvm/test/CodeGen/AArch64/tailcall-string-rvo.ll index bdc09235afd..d9d2180b5ef 100644 --- a/llvm/test/CodeGen/AArch64/tailcall-string-rvo.ll +++ b/llvm/test/CodeGen/AArch64/tailcall-string-rvo.ll @@ -32,7 +32,7 @@ bb: %tmp1 = bitcast %class.basic_string.11.42.73* %arg to %union.anon.8.39.70** store %union.anon.8.39.70* %tmp, %union.anon.8.39.70** %tmp1, align 8 %tmp2 = bitcast %union.anon.8.39.70* %tmp to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* nonnull undef, i64 13, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* nonnull undef, i64 13, i1 false) %tmp3 = getelementptr inbounds %class.basic_string.11.42.73, %class.basic_string.11.42.73* %arg, i64 0, i32 0, i32 0, i32 1 store i64 13, i64* %tmp3, align 8 %tmp4 = getelementptr inbounds %class.basic_string.11.42.73, %class.basic_string.11.42.73* %arg, i64 0, i32 0, i32 0, i32 2, i32 1, i64 5 @@ -42,6 +42,6 @@ bb: } ; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0 attributes #0 = { argmemonly nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index 8cabc7dae13..b40fcb3e492 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -1,6 +1,6 @@ ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s -declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0 @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4 @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 @@ -68,7 +68,7 @@ define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrsp ; HSA: @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2 define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { - call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* %out, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i32 4, i1 false) + call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll index c23dea2b6b7..84c8d9b778c 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll @@ -9,16 +9,16 @@ @lds.missing.align.0 = internal unnamed_addr addrspace(3) global [39 x i32] undef @lds.missing.align.1 = internal unnamed_addr addrspace(3) global [7 x i64] undef -declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) #0 +declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #0 ; HSA-LABEL: {{^}}test_no_round_size_1: ; HSA: workgroup_group_segment_byte_size = 38 define amdgpu_kernel void @test_no_round_size_1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false) ret void } @@ -36,12 +36,12 @@ define amdgpu_kernel void @test_no_round_size_1(i8 addrspace(1)* %out, i8 addrsp ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false) %lds.align16.1.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.1 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.1.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.1.bc, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.1.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.1.bc, i32 38, i1 false) ret void } @@ -52,12 +52,12 @@ define amdgpu_kernel void @test_round_size_2(i8 addrspace(1)* %out, i8 addrspace ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_2_align_8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) ret void } @@ -67,11 +67,11 @@ define amdgpu_kernel void @test_round_size_2_align_8(i8 addrspace(1)* %out, i8 a ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_local_lds_and_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* %lds.arg) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.arg, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.arg, i32 38, i1 false) ret void } @@ -79,8 +79,8 @@ define amdgpu_kernel void @test_round_local_lds_and_arg(i8 addrspace(1)* %out, i ; HSA: workgroup_group_segment_byte_size = 0 ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_lds_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* %lds.arg) #1 { - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.arg, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.arg, i32 38, i1 false) ret void } @@ -89,8 +89,8 @@ define amdgpu_kernel void @test_round_lds_arg(i8 addrspace(1)* %out, i8 addrspac ; HSA: workgroup_group_segment_byte_size = 0 ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_high_align_lds_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* align 64 %lds.arg) #1 { - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 64, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 64, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 64 %lds.arg, i8 addrspace(1)* align 64 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 64 %out, i8 addrspace(3)* align 64 %lds.arg, i32 38, i1 false) ret void } @@ -100,12 +100,12 @@ define amdgpu_kernel void @test_high_align_lds_arg(i8 addrspace(1)* %out, i8 add ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_missing_alignment_size_2_order0(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.missing.align.0.bc = bitcast [39 x i32] addrspace(3)* @lds.missing.align.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.0.bc, i8 addrspace(1)* %in, i32 160, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.0.bc, i32 160, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i8 addrspace(1)* align 4 %in, i32 160, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i32 160, i1 false) %lds.missing.align.1.bc = bitcast [7 x i64] addrspace(3)* @lds.missing.align.1 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.1.bc, i8 addrspace(1)* %in, i32 56, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.1.bc, i32 56, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i8 addrspace(1)* align 8 %in, i32 56, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i32 56, i1 false) ret void } @@ -116,12 +116,12 @@ define amdgpu_kernel void @test_missing_alignment_size_2_order0(i8 addrspace(1)* ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_missing_alignment_size_2_order1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.missing.align.1.bc = bitcast [7 x i64] addrspace(3)* @lds.missing.align.1 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.1.bc, i8 addrspace(1)* %in, i32 56, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.1.bc, i32 56, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i8 addrspace(1)* align 8 %in, i32 56, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i32 56, i1 false) %lds.missing.align.0.bc = bitcast [39 x i32] addrspace(3)* @lds.missing.align.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.0.bc, i8 addrspace(1)* %in, i32 160, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.0.bc, i32 160, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i8 addrspace(1)* align 4 %in, i32 160, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i32 160, i1 false) ret void } @@ -144,16 +144,16 @@ define amdgpu_kernel void @test_missing_alignment_size_2_order1(i8 addrspace(1)* ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order0(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) ret void } @@ -165,16 +165,16 @@ define amdgpu_kernel void @test_round_size_3_order0(i8 addrspace(1)* %out, i8 ad ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) ret void } @@ -186,16 +186,16 @@ define amdgpu_kernel void @test_round_size_3_order1(i8 addrspace(1)* %out, i8 ad ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) ret void } @@ -207,16 +207,16 @@ define amdgpu_kernel void @test_round_size_3_order2(i8 addrspace(1)* %out, i8 ad ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order3(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) ret void } @@ -228,16 +228,16 @@ define amdgpu_kernel void @test_round_size_3_order3(i8 addrspace(1)* %out, i8 ad ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order4(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) ret void } @@ -249,16 +249,16 @@ define amdgpu_kernel void @test_round_size_3_order4(i8 addrspace(1)* %out, i8 ad ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order5(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll index 4068c020e70..77eb4900ea5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll @@ -1,9 +1,9 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind -declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind +declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i1) nounwind ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1: @@ -83,7 +83,7 @@ declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i1 false) nounwind ret void } @@ -128,7 +128,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 2 %bcout, i8 addrspace(3)* align 2 %bcin, i32 32, i1 false) nounwind ret void } @@ -147,7 +147,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 4 %bcout, i8 addrspace(3)* align 4 %bcin, i32 32, i1 false) nounwind ret void } @@ -164,7 +164,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 8 %bcout, i8 addrspace(3)* align 8 %bcin, i32 32, i1 false) nounwind ret void } @@ -241,7 +241,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i1 false) nounwind ret void } @@ -284,7 +284,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 add define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %bcout, i8 addrspace(1)* align 2 %bcin, i64 32, i1 false) nounwind ret void } @@ -297,7 +297,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 add define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %bcout, i8 addrspace(1)* align 4 %bcin, i64 32, i1 false) nounwind ret void } @@ -310,7 +310,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 add define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 8 %bcout, i8 addrspace(1)* align 8 %bcin, i64 32, i1 false) nounwind ret void } @@ -323,7 +323,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 add define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 16 %bcout, i8 addrspace(1)* align 16 %bcin, i64 32, i1 false) nounwind ret void } @@ -342,7 +342,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 ad ; SI-DAG: buffer_store_dwordx4 define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind { %str = bitcast [16 x i8] addrspace(2)* @hello.align4 to i8 addrspace(2)* - call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 4, i1 false) + call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(2)* align 4 %str, i64 32, i1 false) ret void } @@ -367,6 +367,6 @@ define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noal ; SI: buffer_store_byte define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind { %str = bitcast [16 x i8] addrspace(2)* @hello.align1 to i8 addrspace(2)* - call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll index 778467207a0..498a65dc0a6 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll @@ -1,16 +1,16 @@ ; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s -declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1 -declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 +declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1 -declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1 -declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i32, i1) #1 +declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 +declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) #1 ; Test the upper bound for sizes to leave ; OPT-LABEL: @max_size_small_static_memcpy_caller0( -; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) +; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) ret void } @@ -26,14 +26,14 @@ define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* ; OPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025 ; OPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) ret void } ; OPT-LABEL: @max_size_small_static_memmove_caller0( -; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) +; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) + call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) ret void } @@ -44,14 +44,14 @@ define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1) ; OPT: getelementptr ; OPT-NEXT: store i8 define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false) + call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) ret void } ; OPT-LABEL: @max_size_small_static_memset_caller0( -; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false) +; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false) define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { - call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false) + call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false) ret void } @@ -60,7 +60,7 @@ define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* ; OPT: getelementptr ; OPT: store i8 define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { - call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i32 1, i1 false) + call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i1 false) ret void } @@ -68,7 +68,7 @@ define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* ; OPT-NOT: call ; OPT: phi define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) ret void } @@ -76,7 +76,7 @@ define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 add ; OPT-NOT: call ; OPT: phi define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) ret void } @@ -87,8 +87,8 @@ define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 add ; OPT: phi ; OPT-NOT: call define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i1 false) ret void } @@ -99,7 +99,7 @@ define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, ; OPT: getelementptr inbounds i8, i8 addrspace(1)* ; OPT: store i8 define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i1 false) ret void } @@ -110,10 +110,10 @@ define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3 ; OPT: getelementptr inbounds i8, i8 addrspace(1)* ; OPT: store i8 -; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false) +; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false) define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll index 7343dd6bbda..fcf64ce8016 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll @@ -1,52 +1,52 @@ ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s -declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0 -declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0 +declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0 -declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0 -declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0 +declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) #1 ; CHECK-LABEL: @promote_with_memcpy( ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}} -; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false) +; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) +; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false) define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %alloca = alloca [17 x i32], align 4 %alloca.bc = bitcast [17 x i32]* %alloca to i8* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8* %alloca.bc, i32 68, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false) ret void } ; CHECK-LABEL: @promote_with_memmove( ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memmove.alloca, i32 0, i32 %{{[0-9]+}} -; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) -; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false) +; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) +; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false) define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %alloca = alloca [17 x i32], align 4 %alloca.bc = bitcast [17 x i32]* %alloca to i8* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memmove.p0i8.p1i8.i32(i8* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) - call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8* %alloca.bc, i32 68, i32 4, i1 false) + call void @llvm.memmove.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) + call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false) ret void } ; CHECK-LABEL: @promote_with_memset( ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}} -; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %alloca.bc, i8 7, i32 68, i32 4, i1 false) +; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 7, i32 68, i1 false) define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %alloca = alloca [17 x i32], align 4 %alloca.bc = bitcast [17 x i32]* %alloca to i8* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memset.p0i8.i32(i8* %alloca.bc, i8 7, i32 68, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 %alloca.bc, i8 7, i32 68, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll index 45a399b058c..322e5ca6219 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll @@ -1,7 +1,7 @@ ; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; RUN: not llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #1 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #1 ; ERROR: error: stack size limit exceeded (4294967296) in stack_size_limit ; GCN: ; ScratchSize: 4294967296 @@ -9,6 +9,6 @@ define amdgpu_kernel void @stack_size_limit() #0 { entry: %alloca = alloca [1073741823 x i32], align 4 %bc = bitcast [1073741823 x i32]* %alloca to i8* - call void @llvm.memset.p0i8.i32(i8* %bc, i8 9, i32 1073741823, i32 1, i1 true) + call void @llvm.memset.p0i8.i32(i8* %bc, i8 9, i32 1073741823, i1 true) ret void } diff --git a/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll index 567400318ee..62a9aa23f29 100644 --- a/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll +++ b/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll @@ -59,7 +59,7 @@ bb3: ; preds = %entry %34 = fadd double %31, 0.000000e+00 %35 = fadd double %32, 0.000000e+00 %36 = bitcast %struct.ggPoint3* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* null, i8* %36, i32 24, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 null, i8* align 4 %36, i32 24, i1 false) store double %33, double* null, align 8 br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i @@ -76,4 +76,4 @@ bb7: ; preds = %entry ret i32 0 } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind diff --git a/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll b/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll index c447a1f25b6..30a388bb587 100644 --- a/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll +++ b/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll @@ -16,7 +16,7 @@ bb: ; preds = %entry bb1: ; preds = %entry %0 = call %struct.ui* @vn_pp_to_ui(i32* undef) nounwind - call void @llvm.memset.p0i8.i32(i8* undef, i8 0, i32 40, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 undef, i8 0, i32 40, i1 false) %1 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 0 store %struct.mo* undef, %struct.mo** %1, align 4 %2 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 5 @@ -40,7 +40,7 @@ bb6: ; preds = %bb3 declare %struct.ui* @vn_pp_to_ui(i32*) -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind declare i32 @mo_create_nnm(%struct.mo*, i64, i32**) diff --git a/llvm/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/llvm/test/CodeGen/ARM/2011-10-26-memset-inline.ll index c3b7c4ea86c..8d6ce34c26d 100644 --- a/llvm/test/CodeGen/ARM/2011-10-26-memset-inline.ll +++ b/llvm/test/CodeGen/ARM/2011-10-26-memset-inline.ll @@ -14,8 +14,8 @@ target triple = "thumbv7-apple-ios5.0.0" ; CHECK-UNALIGNED: str define void @foo(i8* nocapture %c) nounwind optsize { entry: - call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/llvm/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll index c8e08c22ab1..7024a653b6c 100644 --- a/llvm/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll +++ b/llvm/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll @@ -5,8 +5,8 @@ ; CHECK: vst1.64 define void @f_0_40(i8* nocapture %c) nounwind optsize { entry: - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 16 %c, i8 0, i64 40, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll b/llvm/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll index ce0dcc70952..ef33b2f5018 100644 --- a/llvm/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll +++ b/llvm/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll @@ -19,7 +19,7 @@ declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone declare i8* @__cxa_begin_catch(i8*) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind declare void @__cxa_end_catch() diff --git a/llvm/test/CodeGen/ARM/Windows/memset.ll b/llvm/test/CodeGen/ARM/Windows/memset.ll index 500e25e259c..c9b22f47a15 100644 --- a/llvm/test/CodeGen/ARM/Windows/memset.ll +++ b/llvm/test/CodeGen/ARM/Windows/memset.ll @@ -2,11 +2,11 @@ @source = common global [512 x i8] zeroinitializer, align 4 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind define void @function() { entry: - call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i1 false) unreachable } diff --git a/llvm/test/CodeGen/ARM/Windows/no-aeabi.ll b/llvm/test/CodeGen/ARM/Windows/no-aeabi.ll index a4103b0a676..a5f7fc8daf6 100644 --- a/llvm/test/CodeGen/ARM/Windows/no-aeabi.ll +++ b/llvm/test/CodeGen/ARM/Windows/no-aeabi.ll @@ -1,14 +1,14 @@ ; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -verify-machineinstrs -o - %s | FileCheck %s -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind @source = common global [512 x i8] zeroinitializer, align 4 @target = common global [512 x i8] zeroinitializer, align 4 define void @move() nounwind { entry: - call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i1 false) unreachable } @@ -16,7 +16,7 @@ entry: define void @copy() nounwind { entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i1 false) unreachable } diff --git a/llvm/test/CodeGen/ARM/arm-eabi.ll b/llvm/test/CodeGen/ARM/arm-eabi.ll index 898055dd109..c2f364ab92b 100644 --- a/llvm/test/CodeGen/ARM/arm-eabi.ll +++ b/llvm/test/CodeGen/ARM/arm-eabi.ll @@ -39,7 +39,7 @@ define void @foo(i32* %t) { %4 = bitcast %struct.my_s* %3 to i8* ; CHECK-EABI: bl __aeabi_memcpy ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %4, i8* inttoptr (i32 1 to i8*), i32 72, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %4, i8* align 4 inttoptr (i32 1 to i8*), i32 72, i1 false) ret void } @@ -50,22 +50,22 @@ entry: ; memmove ; CHECK-EABI: bl __aeabi_memmove ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; memcpy ; CHECK-EABI: bl __aeabi_memcpy ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; memset ; CHECK-EABI: mov r2, #1 ; CHECK-EABI: bl __aeabi_memset ; CHECK-GNUEABI: mov r1, #1 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i1 false) ret void } -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind diff --git a/llvm/test/CodeGen/ARM/constantpool-promote-ldrh.ll b/llvm/test/CodeGen/ARM/constantpool-promote-ldrh.ll index 59970495874..0767d729a0a 100644 --- a/llvm/test/CodeGen/ARM/constantpool-promote-ldrh.ll +++ b/llvm/test/CodeGen/ARM/constantpool-promote-ldrh.ll @@ -12,10 +12,10 @@ target triple = "thumbv6m-arm-linux-gnueabi" ; CHECK: ldrh r{{[0-9]+}}, {{\[}}[[base]]] define hidden i32 @fn1() #0 { entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 undef, i8* align 2 bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i1 false) ret i32 undef } ; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) attributes #0 = { "target-features"="+strict-align" } diff --git a/llvm/test/CodeGen/ARM/constantpool-promote.ll b/llvm/test/CodeGen/ARM/constantpool-promote.ll index d5361f33a98..ccd86257dd3 100644 --- a/llvm/test/CodeGen/ARM/constantpool-promote.ll +++ b/llvm/test/CodeGen/ARM/constantpool-promote.ll @@ -120,7 +120,7 @@ define void @fn1() "target-features"="+strict-align" { entry: %a = alloca [4 x i16], align 2 %0 = bitcast [4 x i16]* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %0, i8* align 2 bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i1 false) ret void } @@ -128,7 +128,7 @@ define void @fn2() "target-features"="+strict-align" { entry: %a = alloca [8 x i8], align 2 %0 = bitcast [8 x i8]* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast ([8 x i8]* @fn2.a to i8*), i32 16, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast ([8 x i8]* @fn2.a to i8*), i32 16, i1 false) ret void } @@ -156,7 +156,7 @@ define void @pr32130() #0 { ; CHECK-V7: [[x]]: ; CHECK-V7: .asciz "s\000\000" define void @test10(i8* %a) local_unnamed_addr #0 { - call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i32 1, i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i32 1, i1 false) ret void } @@ -174,16 +174,16 @@ define void @test10(i8* %a) local_unnamed_addr #0 { ; CHECK-V7ARM: .short 3 ; CHECK-V7ARM: .short 4 define void @test11(i16* %a) local_unnamed_addr #0 { - call void @llvm.memmove.p0i16.p0i16.i32(i16* %a, i16* getelementptr inbounds ([2 x i16], [2 x i16]* @.arr1, i32 0, i32 0), i32 2, i32 2, i1 false) + call void @llvm.memmove.p0i16.p0i16.i32(i16* align 2 %a, i16* align 2 getelementptr inbounds ([2 x i16], [2 x i16]* @.arr1, i32 0, i32 0), i32 2, i1 false) ret void } declare void @b(i8*) #1 declare void @c(i16*) #1 -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) -declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) local_unnamed_addr -declare void @llvm.memmove.p0i16.p0i16.i32(i16*, i16*, i32, i32, i1) local_unnamed_addr +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1) local_unnamed_addr +declare void @llvm.memmove.p0i16.p0i16.i32(i16*, i16*, i32, i1) local_unnamed_addr attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/ARM/crash-O0.ll b/llvm/test/CodeGen/ARM/crash-O0.ll index f92af999be5..bfbab8a9933 100644 --- a/llvm/test/CodeGen/ARM/crash-O0.ll +++ b/llvm/test/CodeGen/ARM/crash-O0.ll @@ -12,7 +12,7 @@ entry: } @.str523 = private constant [256 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4 ; <[256 x i8]*> [#uses=1] -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind ; This function uses the scavenger for an ADDri instruction. ; ARMBaseRegisterInfo::estimateRSStackSizeLimit must return a 255 limit. @@ -21,8 +21,8 @@ entry: %letter = alloca i8 ; [#uses=0] %prodvers = alloca [256 x i8] ; <[256 x i8]*> [#uses=1] %buildver = alloca [256 x i8] ; <[256 x i8]*> [#uses=0] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i1 false) %prodvers2 = bitcast [256 x i8]* %prodvers to i8* ; [#uses=1] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %prodvers2, i8* align 1 getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i1 false) unreachable } diff --git a/llvm/test/CodeGen/ARM/debug-info-blocks.ll b/llvm/test/CodeGen/ARM/debug-info-blocks.ll index ba265cc3547..cc1a45f23da 100644 --- a/llvm/test/CodeGen/ARM/debug-info-blocks.ll +++ b/llvm/test/CodeGen/ARM/debug-info-blocks.ll @@ -35,7 +35,7 @@ declare i8* @objc_msgSend(i8*, i8*, ...) declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp !dbg !23 { %1 = alloca %0*, align 4 @@ -77,7 +77,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load %24 = bitcast i8* %23 to %struct.CR*, !dbg !143 %25 = bitcast %struct.CR* %24 to i8*, !dbg !143 %26 = bitcast %struct.CR* %data to i8*, !dbg !143 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %25, i8* %26, i32 16, i32 4, i1 false), !dbg !143 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 16, i1 false), !dbg !143 %27 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !144 %28 = load %3*, %3** %27, align 4, !dbg !144 %29 = load i32, i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144 @@ -86,7 +86,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load %32 = bitcast i8* %31 to %struct.CR*, !dbg !144 %33 = bitcast %struct.CR* %32 to i8*, !dbg !144 %34 = bitcast %struct.CR* %bounds to i8*, !dbg !144 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %33, i8* %34, i32 16, i32 4, i1 false), !dbg !144 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %33, i8* align 4 %34, i32 16, i1 false), !dbg !144 %35 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !145 %36 = load %3*, %3** %35, align 4, !dbg !145 %37 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !145 diff --git a/llvm/test/CodeGen/ARM/dyn-stackalloc.ll b/llvm/test/CodeGen/ARM/dyn-stackalloc.ll index 5b963fd64de..b653acbd6a7 100644 --- a/llvm/test/CodeGen/ARM/dyn-stackalloc.ll +++ b/llvm/test/CodeGen/ARM/dyn-stackalloc.ll @@ -51,7 +51,7 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) { %tmp9 = call i8* @strcpy(i8* %tmp6, i8* %tag) %tmp6.len = call i32 @strlen(i8* %tmp6) %tmp6.indexed = getelementptr i8, i8* %tmp6, i32 %tmp6.len - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %tmp6.indexed, i8* align 1 getelementptr inbounds ([2 x i8], [2 x i8]* @str215, i32 0, i32 0), i32 2, i1 false) %tmp15 = call i8* @strcat(i8* %tmp6, i8* %contents) call fastcc void @comment_add(%struct.comment* %vc, i8* %tmp6) ret void @@ -65,4 +65,4 @@ declare fastcc void @comment_add(%struct.comment*, i8*) declare i8* @strcpy(i8*, i8*) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind diff --git a/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll b/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll index 277461aa566..8d9c27b6f22 100644 --- a/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -44,11 +44,11 @@ define void @t1() nounwind ssp { ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i1 false) ret void } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind define void @t2() nounwind ssp { ; ARM-LABEL: t2: @@ -93,11 +93,11 @@ define void @t2() nounwind ssp { ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 17, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 17, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @t3() nounwind ssp { ; ARM-LABEL: t3: @@ -141,7 +141,7 @@ define void @t3() nounwind ssp { ; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } @@ -173,11 +173,11 @@ define void @t4() nounwind ssp { ; THUMB: ldrh r1, [r0, #24] ; THUMB: strh r1, [r0, #12] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @t5() nounwind ssp { ; ARM-LABEL: t5: @@ -215,7 +215,7 @@ define void @t5() nounwind ssp { ; THUMB: ldrh r1, [r0, #24] ; THUMB: strh r1, [r0, #12] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } @@ -275,14 +275,14 @@ define void @t6() nounwind ssp { ; THUMB: ldrb r1, [r0, #25] ; THUMB: strb r1, [r0, #13] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } ; rdar://13202135 define void @t7() nounwind ssp { ; Just make sure this doesn't assert when we have an odd length and an alignment of 2. - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 3, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 3, i1 false) ret void } diff --git a/llvm/test/CodeGen/ARM/interval-update-remat.ll b/llvm/test/CodeGen/ARM/interval-update-remat.ll index 524e8a0aa49..216f7e915a8 100644 --- a/llvm/test/CodeGen/ARM/interval-update-remat.ll +++ b/llvm/test/CodeGen/ARM/interval-update-remat.ll @@ -85,7 +85,7 @@ _ZN7MessageD1Ev.exit33: ; preds = %delete.notnull.i.i. if.end: ; preds = %_ZN7MessageD1Ev.exit33, %entry %message_.i.i = getelementptr inbounds %class.AssertionResult.24.249.299.1324.2349, %class.AssertionResult.24.249.299.1324.2349* %gtest_ar, i32 0, i32 1 %call.i.i.i = call %class.scoped_ptr.23.248.298.1323.2348* @_ZN10scoped_ptrI25Trans_NS___1_basic_stringIciiEED2Ev(%class.scoped_ptr.23.248.298.1323.2348* %message_.i.i) - call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 12, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 null, i8 0, i32 12, i1 false) call void @_ZN25Trans_NS___1_basic_stringIciiE5m_fn2Ev(%class.Trans_NS___1_basic_string.18.243.293.1318.2343* nonnull %ref.tmp) call void @_Z19CreateSOCKSv5Paramsv(%class.scoped_refptr.19.244.294.1319.2344* nonnull sret %agg.tmp16) %callback_.i = getelementptr inbounds %class.TestCompletionCallback.9.234.284.1309.2334, %class.TestCompletionCallback.9.234.284.1309.2334* %callback, i32 0, i32 1 @@ -137,7 +137,7 @@ declare void @_ZN18ClientSocketHandle5m_fn3IPiEEvRK25Trans_NS___1_basic_stringIc declare void @_Z19CreateSOCKSv5Paramsv(%class.scoped_refptr.19.244.294.1319.2344* sret) ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 declare %class.BoundNetLog.20.245.295.1320.2345* @_ZN11BoundNetLogD1Ev(%class.BoundNetLog.20.245.295.1320.2345* returned) unnamed_addr diff --git a/llvm/test/CodeGen/ARM/ldm-stm-base-materialization.ll b/llvm/test/CodeGen/ARM/ldm-stm-base-materialization.ll index a3231f95f47..755619e8b3e 100644 --- a/llvm/test/CodeGen/ARM/ldm-stm-base-materialization.ll +++ b/llvm/test/CodeGen/ARM/ldm-stm-base-materialization.ll @@ -22,7 +22,7 @@ entry: %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 24, i1 false) ret void } @@ -43,7 +43,7 @@ entry: %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 28, i1 false) ret void } @@ -64,7 +64,7 @@ entry: %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 32, i1 false) ret void } @@ -85,9 +85,9 @@ entry: %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 36, i1 false) ret void } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 diff --git a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll index 611cba6ed1f..10e56a346a2 100644 --- a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll @@ -37,14 +37,14 @@ entry: for.body.lr.ph: ; preds = %entry %1 = icmp sgt i32 %0, 1 %smax = select i1 %1, i32 %0, i32 1 - call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8], [250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8], [250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i1 false) unreachable for.cond1.preheader: ; preds = %entry ret void } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind ; rdar://12462006 define i8* @f3(i8* %base, i32* nocapture %offset, i32 %size) nounwind { diff --git a/llvm/test/CodeGen/ARM/memcpy-inline.ll b/llvm/test/CodeGen/ARM/memcpy-inline.ll index b447497b270..1dccf0b9905 100644 --- a/llvm/test/CodeGen/ARM/memcpy-inline.ll +++ b/llvm/test/CodeGen/ARM/memcpy-inline.ll @@ -23,7 +23,7 @@ entry: ; CHECK-T1: strb [[TREG1]], ; CHECK-T1: ldrh [[TREG2:r[0-9]]], ; CHECK-T1: strh [[TREG2]] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false) ret i32 0 } @@ -37,7 +37,7 @@ entry: ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] ; CHECK-T1-LABEL: t1: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i1 false) ret void } @@ -55,7 +55,7 @@ entry: ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r3] ; CHECK-T1-LABEL: t2: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i1 false) ret void } @@ -68,7 +68,7 @@ entry: ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0] ; CHECK-T1-LABEL: t3: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false) ret void } @@ -80,7 +80,7 @@ entry: ; CHECK: strh [[REG5:r[0-9]+]], [r0] ; CHECK-T1-LABEL: t4: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false) ret void } @@ -96,7 +96,7 @@ entry: ; CHECK: str [[REG7]] ; CHECK-T1-LABEL: t5: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false) ret void } @@ -114,7 +114,7 @@ entry: ; CHECK-T1: strh [[TREG5]], ; CHECK-T1: ldr [[TREG6:r[0-9]]], ; CHECK-T1: str [[TREG6]] - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false) ret void } @@ -130,9 +130,9 @@ entry: ; CHECK-T1: str %0 = bitcast %struct.Foo* %a to i8* %1 = bitcast %struct.Foo* %b to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/llvm/test/CodeGen/ARM/memcpy-ldm-stm.ll b/llvm/test/CodeGen/ARM/memcpy-ldm-stm.ll index 2ebe7ed5b14..314f559e357 100644 --- a/llvm/test/CodeGen/ARM/memcpy-ldm-stm.ll +++ b/llvm/test/CodeGen/ARM/memcpy-ldm-stm.ll @@ -24,7 +24,7 @@ entry: ; Think of the monstrosity '{{\[}}[[LB]]]' as '[ [[LB]] ]' without the spaces. ; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]]] ; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]]] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast ([64 x i32]* @s to i8*), i8* align 4 bitcast ([64 x i32]* @d to i8*), i32 17, i1 false) ret void } @@ -42,7 +42,7 @@ entry: ; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2] ; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2] ; CHECK-NEXT: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast ([64 x i32]* @s to i8*), i8* align 4 bitcast ([64 x i32]* @d to i8*), i32 15, i1 false) ret void } @@ -54,13 +54,13 @@ entry: define void @t3() { call void @llvm.memcpy.p0i8.p0i8.i32( - i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), - i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), - i32 24, i32 8, i1 false) + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), + i32 24, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32( - i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), - i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), - i32 24, i32 8, i1 false) + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), + i32 24, i1 false) ret void } @@ -70,7 +70,7 @@ define void @t3() { define void @test3(%struct.S* %d, %struct.S* %s) #0 { %1 = bitcast %struct.S* %d to i8* %2 = bitcast %struct.S* %s to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 48, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %2, i32 48, i1 false) ; 3 ldm/stm pairs in v6; 2 in v7 ; CHECK: ldm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1:{.*}]] ; CHECK: stm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1]] @@ -91,4 +91,4 @@ declare void @g(i32*) attributes #0 = { "no-frame-pointer-elim"="true" } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 diff --git a/llvm/test/CodeGen/ARM/memcpy-no-inline.ll b/llvm/test/CodeGen/ARM/memcpy-no-inline.ll index 126546095e1..7aaac19eee3 100644 --- a/llvm/test/CodeGen/ARM/memcpy-no-inline.ll +++ b/llvm/test/CodeGen/ARM/memcpy-no-inline.ll @@ -14,7 +14,7 @@ entry: ; CHECK-NOT: ldm %mystring = alloca [31 x i8], align 1 %0 = getelementptr inbounds [31 x i8], [31 x i8]* %mystring, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str, i32 0, i32 0), i32 31, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 getelementptr inbounds ([31 x i8], [31 x i8]* @.str, i32 0, i32 0), i32 31, i1 false) ret void } @@ -24,10 +24,10 @@ entry: ; CHECK-NOT: __aeabi_memcpy %mystring = alloca [31 x i8], align 1 %0 = getelementptr inbounds [31 x i8], [31 x i8]* %mystring, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i32 0, i32 0), i32 21, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i32 0, i32 0), i32 21, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 attributes #0 = { minsize noinline nounwind optsize } diff --git a/llvm/test/CodeGen/ARM/memfunc.ll b/llvm/test/CodeGen/ARM/memfunc.ll index ed6746290b7..882091b67f0 100644 --- a/llvm/test/CodeGen/ARM/memfunc.ll +++ b/llvm/test/CodeGen/ARM/memfunc.ll @@ -16,13 +16,13 @@ entry: ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; EABI memset swaps arguments ; CHECK-IOS: mov r1, #1 @@ -33,7 +33,7 @@ entry: ; CHECK-EABI: bl __aeabi_memset ; CHECK-GNUEABI: mov r1, #1 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i1 false) ; EABI uses memclr if value set to 0 ; CHECK-IOS: mov r1, #0 @@ -42,7 +42,7 @@ entry: ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i1 false) ; EABI uses aligned function variants if possible @@ -50,49 +50,49 @@ entry: ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove4 ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy4 ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memset4 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 %dest, i8 1, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr4 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 %dest, i8 0, i32 500, i1 false) ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove8 ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy8 ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memset8 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 8, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 8 %dest, i8 1, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr8 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 8, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 8 %dest, i8 0, i32 500, i1 false) unreachable } @@ -113,7 +113,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [9 x i8], align 1 %0 = bitcast [9 x i8]* %arr0 to i8* - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: add r1, sp, #16 ; CHECK-IOS: bl _memcpy @@ -122,7 +122,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [9 x i8], align 1 %1 = bitcast [9 x i8]* %arr1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK-IOS: mov r0, sp ; CHECK-IOS: mov r1, #1 @@ -138,7 +138,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [9 x i8], align 1 %2 = bitcast [9 x i8]* %arr2 to i8* - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -155,7 +155,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [7 x i8], align 1 %0 = bitcast [7 x i8]* %arr0 to i8* - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r1, sp, #10|sub(.w)? r1, r(7|11), #22}} ; CHECK-IOS: bl _memcpy @@ -164,7 +164,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [7 x i8], align 1 %1 = bitcast [7 x i8]* %arr1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r0, sp, #3|sub(.w)? r0, r(7|11), #29}} ; CHECK-IOS: mov r1, #1 @@ -177,7 +177,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [7 x i8], align 1 %2 = bitcast [7 x i8]* %arr2 to i8* - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -194,7 +194,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [9 x i8], align 1 %0 = getelementptr inbounds [9 x i8], [9 x i8]* %arr0, i32 0, i32 4 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w) r., r(7|11), #26}} ; CHECK-IOS: bl _memcpy @@ -203,7 +203,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [9 x i8], align 1 %1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w) r., r(7|11), #35}} ; CHECK-IOS: mov r1, #1 @@ -216,7 +216,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [9 x i8], align 1 %2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -233,7 +233,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 1 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy @@ -242,7 +242,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 1 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 @@ -255,7 +255,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -272,7 +272,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 %i - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #42}} ; CHECK-IOS: bl _memcpy @@ -281,7 +281,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 %i - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #55}} ; CHECK-IOS: mov r1, #1 @@ -294,7 +294,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -311,7 +311,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr [13 x i8], [13 x i8]* %arr0, i32 0, i32 4 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy @@ -320,7 +320,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr [13 x i8], [13 x i8]* %arr1, i32 0, i32 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 @@ -333,7 +333,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -350,7 +350,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 16 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy @@ -359,7 +359,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 16 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 @@ -372,7 +372,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -390,15 +390,15 @@ entry: @arr9 = weak_odr global [128 x i8] undef define void @f9(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr8, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr9, i32 0, i32 0), i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr8, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr9, i32 0, i32 0), i32 %n, i1 false) unreachable } @@ -428,6 +428,6 @@ entry: ; CHECK-NOT: arr7: -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind diff --git a/llvm/test/CodeGen/ARM/memset-inline.ll b/llvm/test/CodeGen/ARM/memset-inline.ll index b2bd257701d..01b21e9d387 100644 --- a/llvm/test/CodeGen/ARM/memset-inline.ll +++ b/llvm/test/CodeGen/ARM/memset-inline.ll @@ -12,7 +12,7 @@ entry: ; CHECK-6M: str r1, [r0] ; CHECK-6M: str r1, [r0, #4] ; CHECK-6M: str r1, [r0, #8] - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 12, i1 false) ret void } @@ -33,7 +33,7 @@ entry: ; CHECK-6M: str [[REG]], [sp] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i1 false) call void @something(i8* %0) nounwind ret void } @@ -54,7 +54,7 @@ entry: for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] %0 = trunc i32 %i to i8 - call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i1 false) call void @something(i8* %p) %inc = add nuw nsw i32 %i, 1 %exitcond = icmp eq i32 %inc, 255 @@ -78,7 +78,7 @@ entry: for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] %0 = trunc i32 %i to i8 - call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 2, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 2 %p, i8 %0, i32 4, i1 false) call void @something(i8* %p) %inc = add nuw nsw i32 %i, 1 %exitcond = icmp eq i32 %inc, 255 @@ -89,5 +89,5 @@ for.end: } declare void @something(i8*) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll b/llvm/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll index 2a7a82da8f6..84bf7ac826e 100644 --- a/llvm/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll +++ b/llvm/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll @@ -15,13 +15,13 @@ define i32 @main() #0 { entry: %title = alloca [15 x i8], align 1 %0 = getelementptr inbounds [15 x i8], [15 x i8]* %title, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @main.title, i32 0, i32 0), i32 15, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 getelementptr inbounds ([15 x i8], [15 x i8]* @main.title, i32 0, i32 0), i32 15, i1 false) %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8* %0) #3 ret i32 0 } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 ; Function Attrs: nounwind optsize declare i32 @printf(i8* nocapture readonly, ...) #2 diff --git a/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll b/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll index b3ed5de857b..c6509cfe9cf 100644 --- a/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll +++ b/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll @@ -60,10 +60,10 @@ target triple = "armv7l-unknown-linux-gnueabihf" @brefframe = external global [4 x [4 x i8]], align 1 ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) #0 ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 ; Function Attrs: nounwind declare void @SetMotionVectorsMB(%structK* nocapture, i32) #1 @@ -122,10 +122,10 @@ for.cond210.preheader: ; preds = %if.then169 unreachable if.end230: ; preds = %if.end164 - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* bitcast ([4 x i32]* @b8mode to i8*), i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 undef, i8* align 4 bitcast ([4 x i32]* @b8mode to i8*), i32 16, i1 false) %b8pdir = getelementptr inbounds %structK, %structK* %2, i32 %1, i32 15 %3 = bitcast [4 x i32]* %b8pdir to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* bitcast ([4 x i32]* @b8pdir to i8*), i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 bitcast ([4 x i32]* @b8pdir to i8*), i32 16, i1 false) br i1 undef, label %if.end236, label %if.then233 if.then233: ; preds = %if.end230 diff --git a/llvm/test/CodeGen/ARM/tailcall-mem-intrinsics.ll b/llvm/test/CodeGen/ARM/tailcall-mem-intrinsics.ll index 6744efa8ab8..08370f2bf12 100644 --- a/llvm/test/CodeGen/ARM/tailcall-mem-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/tailcall-mem-intrinsics.ll @@ -4,7 +4,7 @@ ; CHECK: bl __aeabi_memcpy define i8* @tail_memcpy_ret(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret i8* %p } @@ -12,7 +12,7 @@ entry: ; CHECK: bl __aeabi_memmove define i8* @tail_memmove_ret(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret i8* %p } @@ -20,12 +20,12 @@ entry: ; CHECK: bl __aeabi_memset define i8* @tail_memset_ret(i8* nocapture %p, i8 %c, i32 %n) #0 { entry: - tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false) ret i8* %p } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AVR/std-ldd-immediate-overflow.ll b/llvm/test/CodeGen/AVR/std-ldd-immediate-overflow.ll index 290e349c534..5580e3ae973 100644 --- a/llvm/test/CodeGen/AVR/std-ldd-immediate-overflow.ll +++ b/llvm/test/CodeGen/AVR/std-ldd-immediate-overflow.ll @@ -8,11 +8,11 @@ define i32 @std_ldd_overflow() { store i32 0, i32 *%1 %2 = bitcast [4 x i8]* %dst to i8* %3 = bitcast [4 x i8]* %src to i8* - call void @llvm.memcpy.p0i8.p0i8.i16(i8* %2, i8* %3, i16 4, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i16(i8* %2, i8* %3, i16 4, i1 false) ; CHECK-NOT: std {{[XYZ]}}+64, {{r[0-9]+}} ; CHECK-NOT: ldd {{r[0-9]+}}, {{[XYZ]}}+64 ret i32 0 } -declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i1) diff --git a/llvm/test/CodeGen/BPF/byval.ll b/llvm/test/CodeGen/BPF/byval.ll index 25ba909d9cd..2d2e8d289d6 100644 --- a/llvm/test/CodeGen/BPF/byval.ll +++ b/llvm/test/CodeGen/BPF/byval.ll @@ -16,7 +16,7 @@ entry: store i32 3, i32* %arrayinit.element2, align 8 %arrayinit.start = getelementptr inbounds %struct.S, %struct.S* %.compoundliteral, i64 0, i32 0, i64 3 %scevgep4 = bitcast i32* %arrayinit.start to i8* - call void @llvm.memset.p0i8.i64(i8* %scevgep4, i8 0, i64 28, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 4 %scevgep4, i8 0, i64 28, i1 false) call void @foo(i32 %a, %struct.S* byval align 8 %.compoundliteral) #3 ret void } @@ -24,4 +24,4 @@ entry: declare void @foo(i32, %struct.S* byval align 8) #1 ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #3 diff --git a/llvm/test/CodeGen/BPF/ex1.ll b/llvm/test/CodeGen/BPF/ex1.ll index 97cc7e07ab9..de9599b54d2 100644 --- a/llvm/test/CodeGen/BPF/ex1.ll +++ b/llvm/test/CodeGen/BPF/ex1.ll @@ -12,7 +12,7 @@ define i32 @bpf_prog1(%struct.bpf_context* nocapture %ctx) #0 section "events/ne %devname = alloca [3 x i8], align 1 %fmt = alloca [15 x i8], align 1 %1 = getelementptr inbounds [3 x i8], [3 x i8]* %devname, i64 0, i64 0 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i1 false) %2 = getelementptr inbounds %struct.bpf_context, %struct.bpf_context* %ctx, i64 0, i32 0 %3 = load i64, i64* %2, align 8 %4 = inttoptr i64 %3 to %struct.sk_buff* @@ -25,7 +25,7 @@ define i32 @bpf_prog1(%struct.bpf_context* nocapture %ctx) #0 section "events/ne ;