diff options
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 47 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 2 | ||||
| -rw-r--r-- | clang/test/CodeGen/aarch64-neon-shifts.c | 43 | ||||
| -rw-r--r-- | clang/test/CodeGen/arm-neon-shifts.c | 45 |
4 files changed, 125 insertions, 12 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b4caab22979..2fe4167a888 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1657,6 +1657,39 @@ Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, return llvm::ConstantVector::getSplat(VTy->getNumElements(), C); } +// \brief Right-shift a vector by a constant. +Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, + llvm::Type *Ty, bool usgn, + const char *name) { + llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); + + int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); + int EltSize = VTy->getScalarSizeInBits(); + + Vec = Builder.CreateBitCast(Vec, Ty); + + // lshr/ashr are undefined when the shift amount is equal to the vector + // element size. + if (ShiftAmt == EltSize) { + if (usgn) { + // Right-shifting an unsigned value by its size yields 0. + llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0); + return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero); + } else { + // Right-shifting a signed value by its size is equivalent + // to a shift of size-1. + --ShiftAmt; + Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); + } + } + + Shift = EmitNeonShiftVector(Shift, Ty, false); + if (usgn) + return Builder.CreateLShr(Vec, Shift, name); + else + return Builder.CreateAShr(Vec, Shift, name); +} + /// GetPointeeAlignment - Given an expression with a pointer type, find the /// alignment of the type referenced by the pointer. Skip over implicit /// casts. @@ -3125,12 +3158,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Ops, "vshrn_n", 1, true); case ARM::BI__builtin_neon_vshr_n_v: case ARM::BI__builtin_neon_vshrq_n_v: - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); - if (usgn) - return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n"); - else - return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n"); + return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, usgn, "vshr_n"); case ARM::BI__builtin_neon_vsri_n_v: case ARM::BI__builtin_neon_vsriq_n_v: rightShift = true; @@ -3142,12 +3170,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case ARM::BI__builtin_neon_vsra_n_v: case ARM::BI__builtin_neon_vsraq_n_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false); - if (usgn) - Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n"); - else - Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n"); + Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); return Builder.CreateAdd(Ops[0], Ops[1]); case ARM::BI__builtin_neon_vst1_v: case ARM::BI__builtin_neon_vst1q_v: diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 8c506ac8609..19c64b71a46 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2155,6 +2155,8 @@ public: llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx); llvm::Value *EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift); + llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, + llvm::Type *Ty, bool usgn, const char *name); llvm::Value *BuildVector(ArrayRef<llvm::Value*> Ops); llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); diff --git a/clang/test/CodeGen/aarch64-neon-shifts.c b/clang/test/CodeGen/aarch64-neon-shifts.c new file mode 100644 index 00000000000..9b939e6f32b --- /dev/null +++ b/clang/test/CodeGen/aarch64-neon-shifts.c @@ -0,0 +1,43 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -ffp-contract=fast -S -emit-llvm -O1 -o - %s | FileCheck %s + +#include <arm_neon.h> + +uint8x8_t test_shift_vshr(uint8x8_t a) { + // CHECK-LABEL: test_shift_vshr + // CHECK: %vshr_n = lshr <8 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5> + return vshr_n_u8(a, 5); +} + +int8x8_t test_shift_vshr_smax(int8x8_t a) { + // CHECK-LABEL: test_shift_vshr_smax + // CHECK: %vshr_n = ashr <8 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> + return vshr_n_s8(a, 8); +} + +uint8x8_t test_shift_vshr_umax(uint8x8_t a) { + // CHECK-LABEL: test_shift_vshr_umax + // CHECK: ret <8 x i8> zeroinitializer + return vshr_n_u8(a, 8); +} + +uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) { + // CHECK-LABEL: test_shift_vsra + // CHECK: %vsra_n = lshr <8 x i8> %b, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5> + // CHECK: %0 = add <8 x i8> %vsra_n, %a + return vsra_n_u8(a, b, 5); +} + +int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) { + // CHECK-LABEL: test_shift_vsra_smax + // CHECK: %vsra_n = ashr <8 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> + // CHECK: %0 = add <8 x i8> %vsra_n, %a + return vsra_n_s8(a, b, 8); +} + +uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) { + // CHECK-LABEL: test_shift_vsra_umax + // CHECK: ret <8 x i8> %a + return vsra_n_u8(a, b, 8); +} diff --git a/clang/test/CodeGen/arm-neon-shifts.c b/clang/test/CodeGen/arm-neon-shifts.c new file mode 100644 index 00000000000..a89ddf5e5c8 --- /dev/null +++ b/clang/test/CodeGen/arm-neon-shifts.c @@ -0,0 +1,45 @@ +// REQUIRES: arm-registered-target +// RUN: %clang_cc1 -triple thumbv7-apple-darwin \ +// RUN: -target-cpu cortex-a8 \ +// RUN: -ffreestanding \ +// RUN: -emit-llvm -w -O1 -o - %s | FileCheck %s + +#include <arm_neon.h> + +uint8x8_t test_shift_vshr(uint8x8_t a) { + // CHECK-LABEL: test_shift_vshr + // CHECK: %vshr_n = lshr <8 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5> + return vshr_n_u8(a, 5); +} + +int8x8_t test_shift_vshr_smax(int8x8_t a) { + // CHECK-LABEL: test_shift_vshr_smax + // CHECK: %vshr_n = ashr <8 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> + return vshr_n_s8(a, 8); +} + +uint8x8_t test_shift_vshr_umax(uint8x8_t a) { + // CHECK-LABEL: test_shift_vshr_umax + // CHECK: ret <8 x i8> zeroinitializer + return vshr_n_u8(a, 8); +} + +uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) { + // CHECK-LABEL: test_shift_vsra + // CHECK: %vsra_n = lshr <8 x i8> %b, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5> + // CHECK: %0 = add <8 x i8> %vsra_n, %a + return vsra_n_u8(a, b, 5); +} + +int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) { + // CHECK-LABEL: test_shift_vsra_smax + // CHECK: %vsra_n = ashr <8 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> + // CHECK: %0 = add <8 x i8> %vsra_n, %a + return vsra_n_s8(a, b, 8); +} + +uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) { + // CHECK-LABEL: test_shift_vsra_umax + // CHECK: ret <8 x i8> %a + return vsra_n_u8(a, b, 8); +} |

