diff options
author | Tim Shen <timshen91@gmail.com> | 2017-01-20 22:05:33 +0000 |
---|---|---|
committer | Tim Shen <timshen91@gmail.com> | 2017-01-20 22:05:33 +0000 |
commit | 867be0d14ce393108d848bcd9d080a92ca6b0006 (patch) | |
tree | 6ca351b65868feb97cbb4fa202597ba6486274af | |
parent | d88f928a5c4c4a40ee6bdf785767e43ec6167aa8 (diff) | |
download | bcm5719-llvm-867be0d14ce393108d848bcd9d080a92ca6b0006.tar.gz bcm5719-llvm-867be0d14ce393108d848bcd9d080a92ca6b0006.zip |
[Altivec] Change vec_sl to a << (b % (sizeof(a) * 8))
For a << b (as original vec_sl does), if b >= sizeof(a) * 8, the
behavior is undefined. However, Power instructions do define the
behavior, which is equivalent to a << (b % (sizeof(a) * 8)).
This patch changes altivec.h to use a << (b % (sizeof(a) * 8)), to
ensure the consistent semantic of the instructions. Then it combines
the generated multiple instructions back to a single shift.
This patch handles left shift only. Right shift, on the other hand, is
more complicated, considering arithematic/logical right shift.
Differential Revision: https://reviews.llvm.org/D28037
llvm-svn: 292659
-rw-r--r-- | clang/lib/Headers/altivec.h | 44 | ||||
-rw-r--r-- | clang/test/CodeGen/builtins-ppc-altivec.c | 36 |
2 files changed, 49 insertions, 31 deletions
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index a01d9d837ad..421e2a7754a 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -8045,45 +8045,51 @@ static __inline__ vector float __ATTRS_o_ai vec_vsel(vector float __a, /* vec_sl */ -static __inline__ vector signed char __ATTRS_o_ai -vec_sl(vector signed char __a, vector unsigned char __b) { - return __a << (vector signed char)__b; -} - +// vec_sl does modulo arithmetic on __b first, so __b is allowed to be more +// than the length of __a. static __inline__ vector unsigned char __ATTRS_o_ai vec_sl(vector unsigned char __a, vector unsigned char __b) { - return __a << __b; + return __a << (__b % + (vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__)); } -static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a, - vector unsigned short __b) { - return __a << (vector short)__b; +static __inline__ vector signed char __ATTRS_o_ai +vec_sl(vector signed char __a, vector unsigned char __b) { + return (vector signed char)vec_sl((vector unsigned char)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sl(vector unsigned short __a, vector unsigned short __b) { - return __a << __b; + return __a << (__b % (vector unsigned short)(sizeof(unsigned short) * + __CHAR_BIT__)); } -static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a, - vector unsigned int __b) { - return __a << (vector int)__b; +static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a, + vector unsigned short __b) { + return (vector short)vec_sl((vector unsigned short)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sl(vector unsigned int __a, vector unsigned int __b) { - return __a << __b; + return __a << (__b % + (vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__)); } -#ifdef __POWER8_VECTOR__ -static __inline__ vector signed long long __ATTRS_o_ai -vec_sl(vector signed long long __a, vector unsigned long long __b) { - return __a << (vector long long)__b; +static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a, + vector unsigned int __b) { + return (vector int)vec_sl((vector unsigned int)__a, __b); } +#ifdef __POWER8_VECTOR__ static __inline__ vector unsigned long long __ATTRS_o_ai vec_sl(vector unsigned long long __a, vector unsigned long long __b) { - return __a << __b; + return __a << (__b % (vector unsigned long long)(sizeof(unsigned long long) * + __CHAR_BIT__)); +} + +static __inline__ vector long long __ATTRS_o_ai +vec_sl(vector long long __a, vector unsigned long long __b) { + return (vector long long)vec_sl((vector unsigned long long)__a, __b); } #endif diff --git a/clang/test/CodeGen/builtins-ppc-altivec.c b/clang/test/CodeGen/builtins-ppc-altivec.c index 3b75cb49c3f..d54a69802ff 100644 --- a/clang/test/CodeGen/builtins-ppc-altivec.c +++ b/clang/test/CodeGen/builtins-ppc-altivec.c @@ -3419,28 +3419,40 @@ void test6() { /* vec_sl */ res_vsc = vec_sl(vsc, vuc); -// CHECK: shl <16 x i8> -// CHECK-LE: shl <16 x i8> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> +// CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> +// CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vuc = vec_sl(vuc, vuc); -// CHECK: shl <16 x i8> -// CHECK-LE: shl <16 x i8> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> +// CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> +// CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vs = vec_sl(vs, vus); -// CHECK: shl <8 x i16> -// CHECK-LE: shl <8 x i16> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> +// CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> +// CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vus = vec_sl(vus, vus); -// CHECK: shl <8 x i16> -// CHECK-LE: shl <8 x i16> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> +// CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> +// CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vi = vec_sl(vi, vui); -// CHECK: shl <4 x i32> -// CHECK-LE: shl <4 x i32> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32> +// CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32> +// CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vui = vec_sl(vui, vui); -// CHECK: shl <4 x i32> -// CHECK-LE: shl <4 x i32> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32> +// CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32> +// CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vsc = vec_vslb(vsc, vuc); // CHECK: shl <16 x i8> |