diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp | 179 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMSelectionDAGInfo.h | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/memfunc.ll | 113 | 
3 files changed, 223 insertions, 82 deletions
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 06bde40e037..a59cf985110 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -24,6 +24,114 @@ ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL)  ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {  } +// Emit, if possible, a specialized version of the given Libcall. Typically this +// means selecting the appropriately aligned version, but we also convert memset +// of 0 into memclr. +SDValue ARMSelectionDAGInfo:: +EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, +                       SDValue Chain, +                       SDValue Dst, SDValue Src, +                       SDValue Size, unsigned Align, +                       RTLIB::Libcall LC) const { +  const ARMSubtarget &Subtarget = +      DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); +  const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); + +  // Only use a specialized AEABI function if the default version of this +  // Libcall is an AEABI function. +  if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) +    return SDValue(); + +  // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be +  // able to translate memset to memclr and use the value to index the function +  // name array. +  enum { +    AEABI_MEMCPY = 0, +    AEABI_MEMMOVE, +    AEABI_MEMSET, +    AEABI_MEMCLR +  } AEABILibcall; +  switch (LC) { +  case RTLIB::MEMCPY: +    AEABILibcall = AEABI_MEMCPY; +    break; +  case RTLIB::MEMMOVE: +    AEABILibcall = AEABI_MEMMOVE; +    break; +  case RTLIB::MEMSET:  +    AEABILibcall = AEABI_MEMSET; +    if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) +      if (ConstantSrc->getZExtValue() == 0) +        AEABILibcall = AEABI_MEMCLR; +    break; +  default: +    return SDValue(); +  } + +  // Choose the most-aligned libcall variant that we can +  enum { +    ALIGN1 = 0, +    ALIGN4, +    ALIGN8 +  } AlignVariant; +  if ((Align & 7) == 0) +    AlignVariant = ALIGN8; +  else if ((Align & 3) == 0) +    AlignVariant = ALIGN4; +  else +    AlignVariant = ALIGN1; + +  TargetLowering::ArgListTy Args; +  TargetLowering::ArgListEntry Entry; +  Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext()); +  Entry.Node = Dst; +  Args.push_back(Entry); +  if (AEABILibcall == AEABI_MEMCLR) { +    Entry.Node = Size; +    Args.push_back(Entry); +  } else if (AEABILibcall == AEABI_MEMSET) { +    // Adjust parameters for memset, EABI uses format (ptr, size, value), +    // GNU library uses (ptr, value, size) +    // See RTABI section 4.3.4 +    Entry.Node = Size; +    Args.push_back(Entry); + +    // Extend or truncate the argument to be an i32 value for the call. +    if (Src.getValueType().bitsGT(MVT::i32)) +      Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); +    else if (Src.getValueType().bitsLT(MVT::i32)) +      Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + +    Entry.Node = Src;  +    Entry.Ty = Type::getInt32Ty(*DAG.getContext()); +    Entry.isSExt = false; +    Args.push_back(Entry); +  } else { +    Entry.Node = Src; +    Args.push_back(Entry); +     +    Entry.Node = Size; +    Args.push_back(Entry); +  } + +  char const *FunctionNames[4][3] = { +    { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  }, +    { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, +    { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  }, +    { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  } +  }; +  TargetLowering::CallLoweringInfo CLI(DAG); +  CLI.setDebugLoc(dl).setChain(Chain) +    .setCallee(TLI->getLibcallCallingConv(LC), +               Type::getVoidTy(*DAG.getContext()), +               DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], +                                     TLI->getPointerTy()), std::move(Args), 0) +    .setDiscardResult(); +  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); +   +  return CallResult.second; +} +  SDValue  ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,                                               SDValue Chain, @@ -42,10 +150,12 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,    // within a subtarget-specific limit.    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);    if (!ConstantSize) -    return SDValue(); +    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, +                                  RTLIB::MEMCPY);    uint64_t SizeVal = ConstantSize->getZExtValue();    if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) -    return SDValue(); +    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, +                                  RTLIB::MEMCPY);    unsigned BytesLeft = SizeVal & 3;    unsigned NumMemOps = SizeVal >> 2; @@ -142,59 +252,26 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,                       makeArrayRef(TFOps, i));  } -// Adjust parameters for memset, EABI uses format (ptr, size, value), -// GNU library uses (ptr, value, size) -// See RTABI section 4.3.4 + +SDValue ARMSelectionDAGInfo:: +EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl, +                         SDValue Chain, +                         SDValue Dst, SDValue Src, +                         SDValue Size, unsigned Align, +                         bool isVolatile, +                         MachinePointerInfo DstPtrInfo, +                         MachinePointerInfo SrcPtrInfo) const { +  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, +                                RTLIB::MEMMOVE); +} + +  SDValue ARMSelectionDAGInfo::  EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,                          SDValue Chain, SDValue Dst,                          SDValue Src, SDValue Size,                          unsigned Align, bool isVolatile,                          MachinePointerInfo DstPtrInfo) const { -  const ARMSubtarget &Subtarget = -      DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); -  // Use default for non-AAPCS (or MachO) subtargets -  if (!Subtarget.isAAPCS_ABI() || Subtarget.isTargetMachO() || -      Subtarget.isTargetWindows()) -    return SDValue(); - -  const ARMTargetLowering &TLI = *Subtarget.getTargetLowering(); -  TargetLowering::ArgListTy Args; -  TargetLowering::ArgListEntry Entry; - -  // First argument: data pointer -  Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext()); -  Entry.Node = Dst; -  Entry.Ty = IntPtrTy; -  Args.push_back(Entry); - -  // Second argument: buffer size -  Entry.Node = Size; -  Entry.Ty = IntPtrTy; -  Entry.isSExt = false; -  Args.push_back(Entry); - -  // Extend or truncate the argument to be an i32 value for the call. -  if (Src.getValueType().bitsGT(MVT::i32)) -    Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); -  else -    Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); - -  // Third argument: value to fill -  Entry.Node = Src; -  Entry.Ty = Type::getInt32Ty(*DAG.getContext()); -  Entry.isSExt = true; -  Args.push_back(Entry); - -  // Emit __eabi_memset call -  TargetLowering::CallLoweringInfo CLI(DAG); -  CLI.setDebugLoc(dl).setChain(Chain) -    .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET), -               Type::getVoidTy(*DAG.getContext()), -               DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), -                                     TLI.getPointerTy()), std::move(Args), 0) -    .setDiscardResult(); - -  std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); -  return CallResult.second; +  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, +                                RTLIB::MEMSET);  } diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index 94b98e66847..1db190f41e1 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -48,6 +48,13 @@ public:                                    MachinePointerInfo DstPtrInfo,                                    MachinePointerInfo SrcPtrInfo) const override; +  SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl, +                                   SDValue Chain, +                                   SDValue Dst, SDValue Src, +                                   SDValue Size, unsigned Align, bool isVolatile, +                                   MachinePointerInfo DstPtrInfo, +                                   MachinePointerInfo SrcPtrInfo) const override; +    // Adjust parameters for memset, see RTABI section 4.3.4    SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,                                    SDValue Chain, @@ -55,6 +62,12 @@ public:                                    SDValue Op3, unsigned Align,                                    bool isVolatile,                                    MachinePointerInfo DstPtrInfo) const override; + +  SDValue EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, +                                 SDValue Chain, +                                 SDValue Dst, SDValue Src, +                                 SDValue Size, unsigned Align, +                                 RTLIB::Libcall LC) const;  };  } diff --git a/llvm/test/CodeGen/ARM/memfunc.ll b/llvm/test/CodeGen/ARM/memfunc.ll index c2143361ffb..5223983a7f3 100644 --- a/llvm/test/CodeGen/ARM/memfunc.ll +++ b/llvm/test/CodeGen/ARM/memfunc.ll @@ -18,13 +18,64 @@ entry:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)    ; EABI memset swaps arguments +  ; CHECK-IOS: mov r1, #1 +  ; CHECK-IOS: memset +  ; CHECK-DARWIN: movs r1, #1 +  ; CHECK-DARWIN: memset +  ; CHECK-EABI: mov r2, #1 +  ; CHECK-EABI: __aeabi_memset +  call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false) + +  ; EABI uses memclr if value set to 0    ; CHECK-IOS: mov r1, #0    ; CHECK-IOS: memset    ; CHECK-DARWIN: movs r1, #0    ; CHECK-DARWIN: memset -  ; CHECK-EABI: mov r2, #0 -  ; CHECK-EABI: __aeabi_memset +  ; CHECK-EABI: __aeabi_memclr    call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false) +   +  ; EABI uses aligned function variants if possible + +  ; CHECK-IOS: memmove +  ; CHECK-DARWIN: memmove +  ; CHECK-EABI: __aeabi_memmove4 +  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false) + +  ; CHECK-IOS: memcpy +  ; CHECK-DARWIN: memcpy +  ; CHECK-EABI: __aeabi_memcpy4 +  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false) + +  ; CHECK-IOS: memset +  ; CHECK-DARWIN: memset +  ; CHECK-EABI: __aeabi_memset4 +  call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 4, i1 false) + +  ; CHECK-IOS: memset +  ; CHECK-DARWIN: memset +  ; CHECK-EABI: __aeabi_memclr4 +  call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 4, i1 false) + +  ; CHECK-IOS: memmove +  ; CHECK-DARWIN: memmove +  ; CHECK-EABI: __aeabi_memmove8 +  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false) + +  ; CHECK-IOS: memcpy +  ; CHECK-DARWIN: memcpy +  ; CHECK-EABI: __aeabi_memcpy8 +  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false) + +  ; CHECK-IOS: memset +  ; CHECK-DARWIN: memset +  ; CHECK-EABI: __aeabi_memset8 +  call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 8, i1 false) + +  ; CHECK-IOS: memset +  ; CHECK-DARWIN: memset +  ; CHECK-EABI: __aeabi_memclr8 +  call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 8, i1 false) +    unreachable  } @@ -53,17 +104,17 @@ entry:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)    ; CHECK-IOS: mov r0, sp -  ; CHECK-IOS: mov r1, #0 +  ; CHECK-IOS: mov r1, #1    ; CHECK-IOS: memset -  ; CHECK-DARINW: add r0, sp, #4 -  ; CHECK-DARWIN: movs r1, #0 +  ; CHECK-DARWIN: add r0, sp, #4 +  ; CHECK-DARWIN: movs r1, #1    ; CHECK-DARWIN: memset    ; CHECK-EABI: add r0, sp, #4 -  ; CHECK-EABI: mov r2, #0 +  ; CHECK-EABI: mov r2, #1    ; CHECK-EABI: __aeabi_memset    %arr2 = alloca [9 x i8], align 1    %2 = bitcast [9 x i8]* %arr2 to i8* -  call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false) +  call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)    unreachable  } @@ -90,15 +141,15 @@ entry:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)    ; CHECK: {{add(.w)? r0, sp, #3}} -  ; CHECK-IOS: mov r1, #0 +  ; CHECK-IOS: mov r1, #1    ; CHECK-IOS: memset -  ; CHECK-DARWIN: movs r1, #0 +  ; CHECK-DARWIN: movs r1, #1    ; CHECK-DARWIN: memset -  ; CHECK-EABI: mov r2, #0 +  ; CHECK-EABI: mov r2, #1    ; CHECK-EABI: __aeabi_memset    %arr2 = alloca [7 x i8], align 1    %2 = bitcast [7 x i8]* %arr2 to i8* -  call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false) +  call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)    unreachable  } @@ -125,15 +176,15 @@ entry:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)    ; CHECK: {{add(.w)? r., sp, #(1|5)}} -  ; CHECK-IOS: mov r1, #0 +  ; CHECK-IOS: mov r1, #1    ; CHECK-IOS: memset -  ; CHECK-DARWIN: movs r1, #0 +  ; CHECK-DARWIN: movs r1, #1    ; CHECK-DARWIN: memset -  ; CHECK-EABI: mov r2, #0 +  ; CHECK-EABI: mov r2, #1    ; CHECK-EABI: __aeabi_memset    %arr2 = alloca [9 x i8], align 1    %2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4 -  call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false) +  call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)    unreachable  } @@ -160,15 +211,15 @@ entry:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)    ; CHECK: {{add(.w)? r., sp, #(1|5)}} -  ; CHECK-IOS: mov r1, #0 +  ; CHECK-IOS: mov r1, #1    ; CHECK-IOS: memset -  ; CHECK-DARWIN: movs r1, #0 +  ; CHECK-DARWIN: movs r1, #1    ; CHECK-DARWIN: memset -  ; CHECK-EABI: mov r2, #0 +  ; CHECK-EABI: mov r2, #1    ; CHECK-EABI: __aeabi_memset    %arr2 = alloca [13 x i8], align 1    %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1 -  call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false) +  call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)    unreachable  } @@ -195,15 +246,15 @@ entry:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)    ; CHECK: {{add(.w)? r., sp, #(1|5)}} -  ; CHECK-IOS: mov r1, #0 +  ; CHECK-IOS: mov r1, #1    ; CHECK-IOS: memset -  ; CHECK-DARWIN: movs r1, #0 +  ; CHECK-DARWIN: movs r1, #1    ; CHECK-DARWIN: memset -  ; CHECK-EABI: mov r2, #0 +  ; CHECK-EABI: mov r2, #1    ; CHECK-EABI: __aeabi_memset    %arr2 = alloca [13 x i8], align 1    %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i -  call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false) +  call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)    unreachable  } @@ -230,15 +281,15 @@ entry:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)    ; CHECK: {{add(.w)? r., sp, #(1|5)}} -  ; CHECK-IOS: mov r1, #0 +  ; CHECK-IOS: mov r1, #1    ; CHECK-IOS: memset -  ; CHECK-DARWIN: movs r1, #0 +  ; CHECK-DARWIN: movs r1, #1    ; CHECK-DARWIN: memset -  ; CHECK-EABI: mov r2, #0 +  ; CHECK-EABI: mov r2, #1    ; CHECK-EABI: __aeabi_memset    %arr2 = alloca [13 x i8], align 1    %2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4 -  call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false) +  call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)    unreachable  } @@ -265,15 +316,15 @@ entry:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)    ; CHECK: {{add(.w)? r., sp, #(1|5)}} -  ; CHECK-IOS: mov r1, #0 +  ; CHECK-IOS: mov r1, #1    ; CHECK-IOS: memset -  ; CHECK-DARWIN: movs r1, #0 +  ; CHECK-DARWIN: movs r1, #1    ; CHECK-DARWIN: memset -  ; CHECK-EABI: mov r2, #0 +  ; CHECK-EABI: mov r2, #1    ; CHECK-EABI: __aeabi_memset    %arr2 = alloca [13 x i8], align 1    %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16 -  call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false) +  call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)    unreachable  }  | 

