[OpenCL] Use long instead of long long in x86 builtins

Summary: According to C99 standard long long is at least 64 bits in size. However, OpenCL C defines long long as 128 bit signed integer. This prevents one to use x86 builtins when compiling OpenCL C code for x86 targets. The patch changes long long to long for OpenCL only. Patch by: Alexander Batashev <alexander.batashev@intel.com> Reviewers: craig.topper, Ka-Ka, eandrews, erichkeane, Anastasia Reviewed By: Ka-Ka, erichkeane, Anastasia Subscribers: a.elovikov, yaxunl, Anastasia, cfe-commits, ivankara, etyurin, asavonic Tags: #clang Differential Revision: https://reviews.llvm.org/D62580 llvm-svn: 362391
author: Andrew Savonichev <andrew.savonichev@intel.com> 2019-06-03 12:34:59 +0000
committer: Andrew Savonichev <andrew.savonichev@intel.com> 2019-06-03 12:34:59 +0000
commit: fa8cd7691ac28d07f6a127ed26f0dbe49699bd59 (patch)
tree: bbb3d165225aa4a425a9b6b6342c13840d552bfc /clang/test/CodeGen/builtins-x86.c
parent: edfa756f3f5b9e31c69a34542d1b09c947f5dd2b (diff)
download: bcm5719-llvm-fa8cd7691ac28d07f6a127ed26f0dbe49699bd59.tar.gz
bcm5719-llvm-fa8cd7691ac28d07f6a127ed26f0dbe49699bd59.zip
1 files changed, 47 insertions, 11 deletions
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index e237bc2d12b..8236922e3d3 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -target-feature +shstk -target-feature +wbnoinvd -target-feature +cldemote -emit-llvm -o %t %s
 // RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +shstk -target-feature +clzero -target-feature +wbnoinvd -target-feature +cldemote -fsyntax-only -o %t %s
+// RUN: %clang_cc1 -DUSE_64 -DOPENCL -x cl -cl-std=CL2.0 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -target-feature +shstk -target-feature +wbnoinvd -target-feature +cldemote -emit-llvm -o %t %s
 
 #ifdef USE_ALL
 #define USE_3DNOW
@@ -11,7 +12,11 @@
 typedef char V8c __attribute__((vector_size(8 * sizeof(char))));
 typedef signed short V4s __attribute__((vector_size(8)));
 typedef signed int V2i __attribute__((vector_size(8)));
+#ifndef OPENCL
 typedef signed long long V1LLi __attribute__((vector_size(8)));
+#else
+typedef signed long V1LLi __attribute__((vector_size(8)));
+#endif
 
 typedef float V2f __attribute__((vector_size(8)));
 
@@ -19,7 +24,11 @@ typedef float V2f __attribute__((vector_size(8)));
 typedef char V16c __attribute__((vector_size(16)));
 typedef signed short V8s __attribute__((vector_size(16)));
 typedef signed int V4i __attribute__((vector_size(16)));
+#ifndef OPENCL
 typedef signed long long V2LLi __attribute__((vector_size(16)));
+#else
+typedef signed long V2LLi __attribute__((vector_size(16)));
+#endif
 
 typedef float V4f __attribute__((vector_size(16)));
 typedef double V2d __attribute__((vector_size(16)));
@@ -27,7 +36,11 @@ typedef double V2d __attribute__((vector_size(16)));
 // 256-bit
 typedef char V32c __attribute__((vector_size(32)));
 typedef signed int V8i __attribute__((vector_size(32)));
+#ifndef OPENCL
 typedef signed long long V4LLi __attribute__((vector_size(32)));
+#else
+typedef signed long V4LLi __attribute__((vector_size(32)));
+#endif
 
 typedef double V4d __attribute__((vector_size(32)));
 typedef float  V8f __attribute__((vector_size(32)));
@@ -41,21 +54,30 @@ void f0() {
 #endif
   signed int          tmp_i;
   unsigned int        tmp_Ui;
+#ifndef OPENCL
   signed long long    tmp_LLi;
   unsigned long long  tmp_ULLi;
+#else
+  signed long         tmp_LLi;
+  unsigned long       tmp_ULLi;
+#endif
   float               tmp_f;
   double              tmp_d;
 
   void*          tmp_vp;
   const void*    tmp_vCp;
-  char*          tmp_cp; 
-  const char*    tmp_cCp; 
+  char*          tmp_cp;
+  const char*    tmp_cCp;
   int*           tmp_ip;
   float*         tmp_fp;
   const float*   tmp_fCp;
   double*        tmp_dp;
   const double*  tmp_dCp;
+#ifndef OPENCL
   long long*     tmp_LLip;
+#else
+  long*          tmp_LLip;
+#endif
 
 #define imm_i 32
 #define imm_i_0_2 0
@@ -102,8 +124,8 @@ void f0() {
   const V4d* tmp_V4dCp;
   const V8f* tmp_V8fCp;
 
-  tmp_V2LLi = __builtin_ia32_undef128();
-  tmp_V4LLi = __builtin_ia32_undef256();
+  tmp_V2d = __builtin_ia32_undef128();
+  tmp_V4d = __builtin_ia32_undef256();
 
   tmp_i = __builtin_ia32_comieq(tmp_V4f, tmp_V4f);
   tmp_i = __builtin_ia32_comilt(tmp_V4f, tmp_V4f);
@@ -203,9 +225,9 @@ void f0() {
   tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s);
   tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c);
   tmp_V8s = __builtin_ia32_pminsw128(tmp_V8s, tmp_V8s);
-  tmp_V8s = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s);
-  tmp_V4i = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i);
-  tmp_V8s = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s);
+  tmp_V16c = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s);
+  tmp_V8s = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i);
+  tmp_V16c = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s);
   tmp_V8s = __builtin_ia32_pmulhuw128(tmp_V8s, tmp_V8s);
   tmp_V4f = __builtin_ia32_addsubps(tmp_V4f, tmp_V4f);
   tmp_V2d = __builtin_ia32_addsubpd(tmp_V2d, tmp_V2d);
@@ -225,7 +247,7 @@ void f0() {
   tmp_V2i = __builtin_ia32_phsubd(tmp_V2i, tmp_V2i);
   tmp_V8s = __builtin_ia32_phsubsw128(tmp_V8s, tmp_V8s);
   tmp_V4s = __builtin_ia32_phsubsw(tmp_V4s, tmp_V4s);
-  tmp_V16c = __builtin_ia32_pmaddubsw128(tmp_V16c, tmp_V16c);
+  tmp_V8s = __builtin_ia32_pmaddubsw128(tmp_V16c, tmp_V16c);
   tmp_V8c = __builtin_ia32_pmaddubsw(tmp_V8c, tmp_V8c);
   tmp_V8s = __builtin_ia32_pmulhrsw128(tmp_V8s, tmp_V8s);
   tmp_V4s = __builtin_ia32_pmulhrsw(tmp_V4s, tmp_V4s);
@@ -271,9 +293,13 @@ void f0() {
   __builtin_ia32_clrssbsy(tmp_vp);
 
   (void) __builtin_ia32_ldmxcsr(tmp_Ui);
+#ifndef OPENCL
   (void) _mm_setcsr(tmp_Ui);
+#endif
   tmp_Ui = __builtin_ia32_stmxcsr();
+#ifndef OPENCL
   tmp_Ui = _mm_getcsr();
+#endif
   (void)__builtin_ia32_fxsave(tmp_vp);
   (void)__builtin_ia32_fxsave64(tmp_vp);
   (void)__builtin_ia32_fxrstor(tmp_vp);
@@ -321,7 +347,9 @@ void f0() {
   tmp_i = __builtin_ia32_pmovmskb(tmp_V8c);
   (void) __builtin_ia32_movntq(tmp_V1LLip, tmp_V1LLi);
   (void) __builtin_ia32_sfence();
+#ifndef OPENCL
   (void) _mm_sfence();
+#endif
 
   tmp_V4s = __builtin_ia32_psadbw(tmp_V8c, tmp_V8c);
   tmp_V4f = __builtin_ia32_rcpps(tmp_V4f);
@@ -356,13 +384,21 @@ void f0() {
   tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
   tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
   (void) __builtin_ia32_clflush(tmp_vCp);
+#ifndef OPENCL
   (void) _mm_clflush(tmp_vCp);
+#endif
   (void) __builtin_ia32_lfence();
+#ifndef OPENCL
   (void) _mm_lfence();
+#endif
   (void) __builtin_ia32_mfence();
+#ifndef OPENCL
   (void) _mm_mfence();
+#endif
   (void) __builtin_ia32_pause();
+#ifndef OPENCL
   (void) _mm_pause();
+#endif
   tmp_V4s = __builtin_ia32_psllwi(tmp_V4s, tmp_i);
   tmp_V2i = __builtin_ia32_pslldi(tmp_V2i, tmp_i);
   tmp_V1LLi = __builtin_ia32_psllqi(tmp_V1LLi, tmp_i);
@@ -389,12 +425,12 @@ void f0() {
   tmp_V2LLi = __builtin_ia32_psrlqi128(tmp_V2LLi, tmp_i);
   tmp_V8s = __builtin_ia32_psrawi128(tmp_V8s, tmp_i);
   tmp_V4i = __builtin_ia32_psradi128(tmp_V4i, tmp_i);
-  tmp_V8s = __builtin_ia32_pmaddwd128(tmp_V8s, tmp_V8s);
+  tmp_V4i = __builtin_ia32_pmaddwd128(tmp_V8s, tmp_V8s);
   (void) __builtin_ia32_monitor(tmp_vp, tmp_Ui, tmp_Ui);
   (void) __builtin_ia32_mwait(tmp_Ui, tmp_Ui);
   tmp_V16c = __builtin_ia32_lddqu(tmp_cCp);
-  tmp_V2LLi = __builtin_ia32_palignr128(tmp_V2LLi, tmp_V2LLi, imm_i);
-  tmp_V1LLi = __builtin_ia32_palignr(tmp_V1LLi, tmp_V1LLi, imm_i);
+  tmp_V16c = __builtin_ia32_palignr128(tmp_V16c, tmp_V16c, imm_i);
+  tmp_V8c = __builtin_ia32_palignr(tmp_V8c, tmp_V8c, imm_i);
 #ifdef USE_SSE4
   tmp_V16c = __builtin_ia32_pblendvb128(tmp_V16c, tmp_V16c, tmp_V16c);
   tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d);
author	Andrew Savonichev <andrew.savonichev@intel.com>	2019-06-03 12:34:59 +0000
committer	Andrew Savonichev <andrew.savonichev@intel.com>	2019-06-03 12:34:59 +0000
commit	fa8cd7691ac28d07f6a127ed26f0dbe49699bd59 (patch)
tree	bbb3d165225aa4a425a9b6b6342c13840d552bfc /clang/test/CodeGen/builtins-x86.c
parent	edfa756f3f5b9e31c69a34542d1b09c947f5dd2b (diff)
download	bcm5719-llvm-fa8cd7691ac28d07f6a127ed26f0dbe49699bd59.tar.gz bcm5719-llvm-fa8cd7691ac28d07f6a127ed26f0dbe49699bd59.zip