[AArch64] Fixed vget/vset_lane_f16 implementation

Replaced cast and vreinterepret operations with code to reinterpret bitwise the types float16_t and int16_t. llvm-svn: 201112
author: Ana Pazos <apazos@codeaurora.org> 2014-02-10 21:20:53 +0000
committer: Ana Pazos <apazos@codeaurora.org> 2014-02-10 21:20:53 +0000
commit: 9883d6d2b584f60bb0a91a460c349e85356500e5 (patch)
tree: 969d1985d96a31a209fd0c249465914e54280ffa
parent: 47e84fb675214b2b985e54b5ad72c0a27f2bf431 (diff)
download: bcm5719-llvm-9883d6d2b584f60bb0a91a460c349e85356500e5.tar.gz
bcm5719-llvm-9883d6d2b584f60bb0a91a460c349e85356500e5.zip
2 files changed, 99 insertions, 39 deletions
diff --git a/clang/test/CodeGen/aarch64-neon-copy.c b/clang/test/CodeGen/aarch64-neon-copy.c
index ebdf4ce85fe..f0de598906c 100644
--- a/clang/test/CodeGen/aarch64-neon-copy.c
+++ b/clang/test/CodeGen/aarch64-neon-copy.c
@@ -1244,79 +1244,128 @@ float64x2_t test_vcopyq_laneq_f64(float64x2_t a, float64x2_t c) {
   return vcopyq_laneq_f64(a, 1, c, 1);
 }
 
-// CHECK: test_vget_lane_f16
+// CHECK-LABEL: test_vget_lane_f16
 int test_vget_lane_f16(float16x4_t v1) {
   float16_t a = vget_lane_f16(v1, 3);
   return (int)a;
 // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
 }
 
-// CHECK: test_vgetq_lane_f16
+// CHECK-LABEL: test_vgetq_lane_f16
 int test_vgetq_lane_f16(float16x8_t v1) {
   float16_t a = vgetq_lane_f16(v1, 7);
   return (int)a;
 // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
 }
 
-// CHECK: test_vget_lane_f16_2
-float test_vget_lane_f16_2(float16x4_t v1) {
+// CHECK-LABEL: test2_vget_lane_f16
+float test2_vget_lane_f16(float16x4_t v1) {
   float16_t a = vget_lane_f16(v1, 3);
   return (float)a;
 // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
 }
 
-// CHECK: test_vgetq_lane_f16_2
-float test_vgetq_lane_f16_2(float16x8_t v1) {
+// CHECK-LABEL: test2_vgetq_lane_f16
+float test2_vgetq_lane_f16(float16x8_t v1) {
   float16_t a = vgetq_lane_f16(v1, 7);
   return (float)a;
 // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
 }
 
-// CHECK: test_vset_lane_f16
+// CHECK-LABEL: test_vset_lane_f16
 float16x4_t test_vset_lane_f16(float16x4_t v1) {
-  float16_t a;
+  float16_t a = 0.0;
   return vset_lane_f16(a, v1, 3);
-// CHECK: fmov  {{s[0-9]+}}, wzr
-// CHECK-NEXT: ins {{v[0-9]+}}.h[3],  {{v[0-9]+}}.h[0]
+// CHECK: ins {{v[0-9]+}}.h[3], wzr
 }
 
-// CHECK: test_vsetq_lane_f16
+// CHECK-LABEL: test_vsetq_lane_f16
 float16x8_t test_vsetq_lane_f16(float16x8_t v1) {
-  float16_t a;
+  float16_t a = 0.0;
   return vsetq_lane_f16(a, v1, 7);
-// CHECK: fmov  {{s[0-9]+}}, wzr
-// CHECK-NEXT: ins {{v[0-9]+}}.h[7],  {{v[0-9]+}}.h[0]
+// CHECK: ins {{v[0-9]+}}.h[7], wzr
 }
 
-// CHECK: test_vset_lane_f16_2
-float16x4_t test_vset_lane_f16_2(float16x4_t v1) {
+// CHECK-LABEL: test2_vset_lane_f16
+float16x4_t test2_vset_lane_f16(float16x4_t v1) {
+  float16_t a = 1.0;
+  return vset_lane_f16(a, v1, 3);
+// CHECK:  movz    {{w[0-9]+}}, #15360
+// CHECK-NEXT: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test2_vsetq_lane_f16
+float16x8_t test2_vsetq_lane_f16(float16x8_t v1) {
+  float16_t a = 1.0;
+  return vsetq_lane_f16(a, v1, 7);
+// CHECK:  movz    {{w[0-9]+}}, #15360
+// CHECK-NEXT: ins {{v[0-9]+}}.h[7],  {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test_vget_vset_lane_f16
+float16x4_t test_vget_vset_lane_f16(float16x4_t v1) {
   float16_t a = vget_lane_f16(v1, 0);
   return vset_lane_f16(a, v1, 3);
 // CHECK: ins {{v[0-9]+}}.h[3],  {{v[0-9]+}}.h[0]
 }
 
-// CHECK: test_vsetq_lane_f16_2
-float16x8_t test_vsetq_lane_f16_2(float16x8_t v1) {
+// CHECK-LABEL: test_vgetq_vsetq_lane_f16
+float16x8_t test_vgetq_vsetq_lane_f16(float16x8_t v1) {
   float16_t a = vgetq_lane_f16(v1, 0);
   return vsetq_lane_f16(a, v1, 7);
 // CHECK: ins {{v[0-9]+}}.h[7],  {{v[0-9]+}}.h[0]
 }
 
+// CHECK-LABEL: test4_vset_lane_f16
+float16x4_t test4_vset_lane_f16(float16x4_t v1, float b, float c) {
+  float16_t a = (float16_t)b;
+  return vset_lane_f16(a, v1, 3);
+// CHECK: fmov {{w[0-9]+}},  {{s[0-9]+}}
+// CHECK: ins {{v[0-9]+}}.h[3],  {{w[0-9]+}}
+}
 
-// CHECK: test_vsetq_lane_f16_3
-float16x8_t test_vsetq_lane_f16_3(float16x8_t v1, float b, float c) {
+// CHECK-LABEL: test4_vsetq_lane_f16
+float16x8_t test4_vsetq_lane_f16(float16x8_t v1, float b, float c) {
   float16_t a = (float16_t)b;
   return vsetq_lane_f16(a, v1, 7);
+// CHECK: fmov {{w[0-9]+}},  {{s[0-9]+}}
 // CHECK: ins {{v[0-9]+}}.h[7],  {{w[0-9]+}}
 }
 
-// CHECK: test_vsetq_lane_f16_4
-float16x8_t test_vsetq_lane_f16_4(float16x8_t v1, float b, float c) {
+// CHECK-LABEL: test5_vset_lane_f16
+float16x4_t test5_vset_lane_f16(float16x4_t v1, float b, float c) {
+  float16_t a = (float16_t)b;
+  return vset_lane_f16(a, v1, 3);
+// CHECK: fmov {{w[0-9]+}},  {{s[0-9]+}}
+// CHECK: ins {{v[0-9]+}}.h[3],  {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test5_vsetq_lane_f16
+float16x8_t test5_vsetq_lane_f16(float16x8_t v1, float b, float c) {
   float16_t a = (float16_t)b + 1.0;
   return vsetq_lane_f16(a, v1, 7);
+// CHECK: fmov {{w[0-9]+}},  {{s[0-9]+}}
 // CHECK: ins {{v[0-9]+}}.h[7],  {{w[0-9]+}}
 }
 
+// CHECK-LABEL: test_vset_vget_lane_f16
+int test_vset_vget_lane_f16(float16x4_t a) {
+  float16x4_t b;
+  b = vset_lane_f16(3.5, a, 3);
+  float16_t c = vget_lane_f16(b, 3);
+  return (int)c;
+// CHECK: movz x{{[0-9]+}}, #3
+}
+
+// CHECK-LABEL: test_vsetq_vgetq_lane_f16
+int test_vsetq_vgetq_lane_f16(float16x8_t a) {
+  float16x8_t b;
+  b = vsetq_lane_f16(3.5, a, 5);
+  float16_t c = vgetq_lane_f16(b, 5);
+  return (int)c;
+// CHECK: movz x{{[0-9]+}}, #3
+}
+
 // CHECK-LABEL: test_vdup_laneq_p64:
 poly64x1_t test_vdup_laneq_p64(poly64x2_t vec) {
   return vdup_laneq_p64(vec, 0);
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 003722ef3f8..ac9e22491e6 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -2272,28 +2272,39 @@ static std::string GenOpString(const std::string &name, OpKind op,
     std::string typeCode = "";
     InstructionTypeCode(typestr, ClassS, quad, typeCode);
     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
-    if (quad) {
-     s += "int16x8_t __a2 = vreinterpretq_s16_f16(__a1);\\\n";
-     s += "  vgetq_lane_s16(__a2, __b);";
-    } else {
-     s += "int16x4_t __a2 = vreinterpret_s16_f16(__a1);\\\n";
-     s += "  vget_lane_s16(__a2, __b);";
-    }
+
+    std::string intType = quad ? "int16x8_t" : "int16x4_t";
+    std::string intName = quad ? "vgetq" : "vget";
+
+    // reinterpret float16 vector as int16 vector
+    s += intType + " __a2 = *(" + intType + " *)(&__a1);\\\n";
+
+    s += "  int16_t __a3 = " + intName + "_lane_s16(__a2, __b);\\\n";
+
+    // reinterpret int16 vector as float16 vector
+    s += "  float16_t __a4 = *(float16_t *)(&__a3);\\\n";
+    s += "  __a4;";
     break;
   }
   case OpScalarSetLane:{
     std::string typeCode = "";
     InstructionTypeCode(typestr, ClassS, quad, typeCode);
-    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
-    if (quad) {
-     s += "  int16x8_t __b2 = vreinterpretq_s16_f16(b);\\\n";
-     s += "  int16x8_t __b3 = vsetq_lane_s16(__a1, __b2, __c);\\\n";
-     s += "  vreinterpretq_f16_s16(__b3);";
-    } else {
-     s += "  int16x4_t __b2 = vreinterpret_s16_f16(b);\\\n";
-     s += "  int16x4_t __b3 = vset_lane_s16(__a1, __b2, __c);\\\n";
-     s += "  vreinterpret_f16_s16(__b3);";
-    }
+    s += TypeString(proto[1], typestr) + " __a1 = __a;\\\n  ";
+
+    std::string origType = quad ? "float16x8_t" : "float16x4_t";
+    std::string intType = quad ? "int16x8_t" : "int16x4_t";
+    std::string intName = quad ? "vsetq" : "vset";
+
+    // reinterpret float16_t as int16_t
+    s += "int16_t __a2 = *(int16_t *)(&__a1);\\\n";
+    // reinterpret float16 vector as int16 vector
+    s += "  " + intType + " __b2 = *(" + intType + " *)(&__b);\\\n";
+
+    s += "  " + intType + " __b3 = " + intName + "_lane_s16(__a2, __b2, __c);\\\n";
+
+    // reinterpret int16 vector as float16 vector
+    s += "  " + origType + " __b4 = *(" + origType + " *)(&__b3);\\\n";
+    s += "__b4;";
     break;
   }
author	Ana Pazos <apazos@codeaurora.org>	2014-02-10 21:20:53 +0000
committer	Ana Pazos <apazos@codeaurora.org>	2014-02-10 21:20:53 +0000
commit	9883d6d2b584f60bb0a91a460c349e85356500e5 (patch)
tree	969d1985d96a31a209fd0c249465914e54280ffa
parent	47e84fb675214b2b985e54b5ad72c0a27f2bf431 (diff)
download	bcm5719-llvm-9883d6d2b584f60bb0a91a460c349e85356500e5.tar.gz bcm5719-llvm-9883d6d2b584f60bb0a91a460c349e85356500e5.zip