summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAna Pazos <apazos@codeaurora.org>2014-02-10 21:20:53 +0000
committerAna Pazos <apazos@codeaurora.org>2014-02-10 21:20:53 +0000
commit9883d6d2b584f60bb0a91a460c349e85356500e5 (patch)
tree969d1985d96a31a209fd0c249465914e54280ffa
parent47e84fb675214b2b985e54b5ad72c0a27f2bf431 (diff)
downloadbcm5719-llvm-9883d6d2b584f60bb0a91a460c349e85356500e5.tar.gz
bcm5719-llvm-9883d6d2b584f60bb0a91a460c349e85356500e5.zip
[AArch64] Fixed vget/vset_lane_f16 implementation
Replaced cast and vreinterepret operations with code to reinterpret bitwise the types float16_t and int16_t. llvm-svn: 201112
-rw-r--r--clang/test/CodeGen/aarch64-neon-copy.c93
-rw-r--r--clang/utils/TableGen/NeonEmitter.cpp45
2 files changed, 99 insertions, 39 deletions
diff --git a/clang/test/CodeGen/aarch64-neon-copy.c b/clang/test/CodeGen/aarch64-neon-copy.c
index ebdf4ce85fe..f0de598906c 100644
--- a/clang/test/CodeGen/aarch64-neon-copy.c
+++ b/clang/test/CodeGen/aarch64-neon-copy.c
@@ -1244,79 +1244,128 @@ float64x2_t test_vcopyq_laneq_f64(float64x2_t a, float64x2_t c) {
return vcopyq_laneq_f64(a, 1, c, 1);
}
-// CHECK: test_vget_lane_f16
+// CHECK-LABEL: test_vget_lane_f16
int test_vget_lane_f16(float16x4_t v1) {
float16_t a = vget_lane_f16(v1, 3);
return (int)a;
// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
}
-// CHECK: test_vgetq_lane_f16
+// CHECK-LABEL: test_vgetq_lane_f16
int test_vgetq_lane_f16(float16x8_t v1) {
float16_t a = vgetq_lane_f16(v1, 7);
return (int)a;
// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
}
-// CHECK: test_vget_lane_f16_2
-float test_vget_lane_f16_2(float16x4_t v1) {
+// CHECK-LABEL: test2_vget_lane_f16
+float test2_vget_lane_f16(float16x4_t v1) {
float16_t a = vget_lane_f16(v1, 3);
return (float)a;
// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
}
-// CHECK: test_vgetq_lane_f16_2
-float test_vgetq_lane_f16_2(float16x8_t v1) {
+// CHECK-LABEL: test2_vgetq_lane_f16
+float test2_vgetq_lane_f16(float16x8_t v1) {
float16_t a = vgetq_lane_f16(v1, 7);
return (float)a;
// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
}
-// CHECK: test_vset_lane_f16
+// CHECK-LABEL: test_vset_lane_f16
float16x4_t test_vset_lane_f16(float16x4_t v1) {
- float16_t a;
+ float16_t a = 0.0;
return vset_lane_f16(a, v1, 3);
-// CHECK: fmov {{s[0-9]+}}, wzr
-// CHECK-NEXT: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[0]
+// CHECK: ins {{v[0-9]+}}.h[3], wzr
}
-// CHECK: test_vsetq_lane_f16
+// CHECK-LABEL: test_vsetq_lane_f16
float16x8_t test_vsetq_lane_f16(float16x8_t v1) {
- float16_t a;
+ float16_t a = 0.0;
return vsetq_lane_f16(a, v1, 7);
-// CHECK: fmov {{s[0-9]+}}, wzr
-// CHECK-NEXT: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[0]
+// CHECK: ins {{v[0-9]+}}.h[7], wzr
}
-// CHECK: test_vset_lane_f16_2
-float16x4_t test_vset_lane_f16_2(float16x4_t v1) {
+// CHECK-LABEL: test2_vset_lane_f16
+float16x4_t test2_vset_lane_f16(float16x4_t v1) {
+ float16_t a = 1.0;
+ return vset_lane_f16(a, v1, 3);
+// CHECK: movz {{w[0-9]+}}, #15360
+// CHECK-NEXT: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test2_vsetq_lane_f16
+float16x8_t test2_vsetq_lane_f16(float16x8_t v1) {
+ float16_t a = 1.0;
+ return vsetq_lane_f16(a, v1, 7);
+// CHECK: movz {{w[0-9]+}}, #15360
+// CHECK-NEXT: ins {{v[0-9]+}}.h[7], {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test_vget_vset_lane_f16
+float16x4_t test_vget_vset_lane_f16(float16x4_t v1) {
float16_t a = vget_lane_f16(v1, 0);
return vset_lane_f16(a, v1, 3);
// CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[0]
}
-// CHECK: test_vsetq_lane_f16_2
-float16x8_t test_vsetq_lane_f16_2(float16x8_t v1) {
+// CHECK-LABEL: test_vgetq_vsetq_lane_f16
+float16x8_t test_vgetq_vsetq_lane_f16(float16x8_t v1) {
float16_t a = vgetq_lane_f16(v1, 0);
return vsetq_lane_f16(a, v1, 7);
// CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[0]
}
+// CHECK-LABEL: test4_vset_lane_f16
+float16x4_t test4_vset_lane_f16(float16x4_t v1, float b, float c) {
+ float16_t a = (float16_t)b;
+ return vset_lane_f16(a, v1, 3);
+// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
+// CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
+}
-// CHECK: test_vsetq_lane_f16_3
-float16x8_t test_vsetq_lane_f16_3(float16x8_t v1, float b, float c) {
+// CHECK-LABEL: test4_vsetq_lane_f16
+float16x8_t test4_vsetq_lane_f16(float16x8_t v1, float b, float c) {
float16_t a = (float16_t)b;
return vsetq_lane_f16(a, v1, 7);
+// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
// CHECK: ins {{v[0-9]+}}.h[7], {{w[0-9]+}}
}
-// CHECK: test_vsetq_lane_f16_4
-float16x8_t test_vsetq_lane_f16_4(float16x8_t v1, float b, float c) {
+// CHECK-LABEL: test5_vset_lane_f16
+float16x4_t test5_vset_lane_f16(float16x4_t v1, float b, float c) {
+ float16_t a = (float16_t)b;
+ return vset_lane_f16(a, v1, 3);
+// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
+// CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test5_vsetq_lane_f16
+float16x8_t test5_vsetq_lane_f16(float16x8_t v1, float b, float c) {
float16_t a = (float16_t)b + 1.0;
return vsetq_lane_f16(a, v1, 7);
+// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
// CHECK: ins {{v[0-9]+}}.h[7], {{w[0-9]+}}
}
+// CHECK-LABEL: test_vset_vget_lane_f16
+int test_vset_vget_lane_f16(float16x4_t a) {
+ float16x4_t b;
+ b = vset_lane_f16(3.5, a, 3);
+ float16_t c = vget_lane_f16(b, 3);
+ return (int)c;
+// CHECK: movz x{{[0-9]+}}, #3
+}
+
+// CHECK-LABEL: test_vsetq_vgetq_lane_f16
+int test_vsetq_vgetq_lane_f16(float16x8_t a) {
+ float16x8_t b;
+ b = vsetq_lane_f16(3.5, a, 5);
+ float16_t c = vgetq_lane_f16(b, 5);
+ return (int)c;
+// CHECK: movz x{{[0-9]+}}, #3
+}
+
// CHECK-LABEL: test_vdup_laneq_p64:
poly64x1_t test_vdup_laneq_p64(poly64x2_t vec) {
return vdup_laneq_p64(vec, 0);
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 003722ef3f8..ac9e22491e6 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -2272,28 +2272,39 @@ static std::string GenOpString(const std::string &name, OpKind op,
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- if (quad) {
- s += "int16x8_t __a2 = vreinterpretq_s16_f16(__a1);\\\n";
- s += " vgetq_lane_s16(__a2, __b);";
- } else {
- s += "int16x4_t __a2 = vreinterpret_s16_f16(__a1);\\\n";
- s += " vget_lane_s16(__a2, __b);";
- }
+
+ std::string intType = quad ? "int16x8_t" : "int16x4_t";
+ std::string intName = quad ? "vgetq" : "vget";
+
+ // reinterpret float16 vector as int16 vector
+ s += intType + " __a2 = *(" + intType + " *)(&__a1);\\\n";
+
+ s += " int16_t __a3 = " + intName + "_lane_s16(__a2, __b);\\\n";
+
+ // reinterpret int16 vector as float16 vector
+ s += " float16_t __a4 = *(float16_t *)(&__a3);\\\n";
+ s += " __a4;";
break;
}
case OpScalarSetLane:{
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- if (quad) {
- s += " int16x8_t __b2 = vreinterpretq_s16_f16(b);\\\n";
- s += " int16x8_t __b3 = vsetq_lane_s16(__a1, __b2, __c);\\\n";
- s += " vreinterpretq_f16_s16(__b3);";
- } else {
- s += " int16x4_t __b2 = vreinterpret_s16_f16(b);\\\n";
- s += " int16x4_t __b3 = vset_lane_s16(__a1, __b2, __c);\\\n";
- s += " vreinterpret_f16_s16(__b3);";
- }
+ s += TypeString(proto[1], typestr) + " __a1 = __a;\\\n ";
+
+ std::string origType = quad ? "float16x8_t" : "float16x4_t";
+ std::string intType = quad ? "int16x8_t" : "int16x4_t";
+ std::string intName = quad ? "vsetq" : "vset";
+
+ // reinterpret float16_t as int16_t
+ s += "int16_t __a2 = *(int16_t *)(&__a1);\\\n";
+ // reinterpret float16 vector as int16 vector
+ s += " " + intType + " __b2 = *(" + intType + " *)(&__b);\\\n";
+
+ s += " " + intType + " __b3 = " + intName + "_lane_s16(__a2, __b2, __c);\\\n";
+
+ // reinterpret int16 vector as float16 vector
+ s += " " + origType + " __b4 = *(" + origType + " *)(&__b3);\\\n";
+ s += "__b4;";
break;
}
OpenPOWER on IntegriCloud