diff options
| author | Ana Pazos <apazos@codeaurora.org> | 2014-02-10 21:20:53 +0000 |
|---|---|---|
| committer | Ana Pazos <apazos@codeaurora.org> | 2014-02-10 21:20:53 +0000 |
| commit | 9883d6d2b584f60bb0a91a460c349e85356500e5 (patch) | |
| tree | 969d1985d96a31a209fd0c249465914e54280ffa | |
| parent | 47e84fb675214b2b985e54b5ad72c0a27f2bf431 (diff) | |
| download | bcm5719-llvm-9883d6d2b584f60bb0a91a460c349e85356500e5.tar.gz bcm5719-llvm-9883d6d2b584f60bb0a91a460c349e85356500e5.zip | |
[AArch64] Fixed vget/vset_lane_f16 implementation
Replaced cast and vreinterepret operations with
code to reinterpret bitwise the types float16_t and
int16_t.
llvm-svn: 201112
| -rw-r--r-- | clang/test/CodeGen/aarch64-neon-copy.c | 93 | ||||
| -rw-r--r-- | clang/utils/TableGen/NeonEmitter.cpp | 45 |
2 files changed, 99 insertions, 39 deletions
diff --git a/clang/test/CodeGen/aarch64-neon-copy.c b/clang/test/CodeGen/aarch64-neon-copy.c index ebdf4ce85fe..f0de598906c 100644 --- a/clang/test/CodeGen/aarch64-neon-copy.c +++ b/clang/test/CodeGen/aarch64-neon-copy.c @@ -1244,79 +1244,128 @@ float64x2_t test_vcopyq_laneq_f64(float64x2_t a, float64x2_t c) { return vcopyq_laneq_f64(a, 1, c, 1); } -// CHECK: test_vget_lane_f16 +// CHECK-LABEL: test_vget_lane_f16 int test_vget_lane_f16(float16x4_t v1) { float16_t a = vget_lane_f16(v1, 3); return (int)a; // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] } -// CHECK: test_vgetq_lane_f16 +// CHECK-LABEL: test_vgetq_lane_f16 int test_vgetq_lane_f16(float16x8_t v1) { float16_t a = vgetq_lane_f16(v1, 7); return (int)a; // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] } -// CHECK: test_vget_lane_f16_2 -float test_vget_lane_f16_2(float16x4_t v1) { +// CHECK-LABEL: test2_vget_lane_f16 +float test2_vget_lane_f16(float16x4_t v1) { float16_t a = vget_lane_f16(v1, 3); return (float)a; // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] } -// CHECK: test_vgetq_lane_f16_2 -float test_vgetq_lane_f16_2(float16x8_t v1) { +// CHECK-LABEL: test2_vgetq_lane_f16 +float test2_vgetq_lane_f16(float16x8_t v1) { float16_t a = vgetq_lane_f16(v1, 7); return (float)a; // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] } -// CHECK: test_vset_lane_f16 +// CHECK-LABEL: test_vset_lane_f16 float16x4_t test_vset_lane_f16(float16x4_t v1) { - float16_t a; + float16_t a = 0.0; return vset_lane_f16(a, v1, 3); -// CHECK: fmov {{s[0-9]+}}, wzr -// CHECK-NEXT: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[0] +// CHECK: ins {{v[0-9]+}}.h[3], wzr } -// CHECK: test_vsetq_lane_f16 +// CHECK-LABEL: test_vsetq_lane_f16 float16x8_t test_vsetq_lane_f16(float16x8_t v1) { - float16_t a; + float16_t a = 0.0; return vsetq_lane_f16(a, v1, 7); -// CHECK: fmov {{s[0-9]+}}, wzr -// CHECK-NEXT: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[0] +// CHECK: ins {{v[0-9]+}}.h[7], wzr } -// CHECK: test_vset_lane_f16_2 -float16x4_t test_vset_lane_f16_2(float16x4_t v1) { +// CHECK-LABEL: test2_vset_lane_f16 +float16x4_t test2_vset_lane_f16(float16x4_t v1) { + float16_t a = 1.0; + return vset_lane_f16(a, v1, 3); +// CHECK: movz {{w[0-9]+}}, #15360 +// CHECK-NEXT: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} +} + +// CHECK-LABEL: test2_vsetq_lane_f16 +float16x8_t test2_vsetq_lane_f16(float16x8_t v1) { + float16_t a = 1.0; + return vsetq_lane_f16(a, v1, 7); +// CHECK: movz {{w[0-9]+}}, #15360 +// CHECK-NEXT: ins {{v[0-9]+}}.h[7], {{w[0-9]+}} +} + +// CHECK-LABEL: test_vget_vset_lane_f16 +float16x4_t test_vget_vset_lane_f16(float16x4_t v1) { float16_t a = vget_lane_f16(v1, 0); return vset_lane_f16(a, v1, 3); // CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[0] } -// CHECK: test_vsetq_lane_f16_2 -float16x8_t test_vsetq_lane_f16_2(float16x8_t v1) { +// CHECK-LABEL: test_vgetq_vsetq_lane_f16 +float16x8_t test_vgetq_vsetq_lane_f16(float16x8_t v1) { float16_t a = vgetq_lane_f16(v1, 0); return vsetq_lane_f16(a, v1, 7); // CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[0] } +// CHECK-LABEL: test4_vset_lane_f16 +float16x4_t test4_vset_lane_f16(float16x4_t v1, float b, float c) { + float16_t a = (float16_t)b; + return vset_lane_f16(a, v1, 3); +// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}} +// CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} +} -// CHECK: test_vsetq_lane_f16_3 -float16x8_t test_vsetq_lane_f16_3(float16x8_t v1, float b, float c) { +// CHECK-LABEL: test4_vsetq_lane_f16 +float16x8_t test4_vsetq_lane_f16(float16x8_t v1, float b, float c) { float16_t a = (float16_t)b; return vsetq_lane_f16(a, v1, 7); +// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}} // CHECK: ins {{v[0-9]+}}.h[7], {{w[0-9]+}} } -// CHECK: test_vsetq_lane_f16_4 -float16x8_t test_vsetq_lane_f16_4(float16x8_t v1, float b, float c) { +// CHECK-LABEL: test5_vset_lane_f16 +float16x4_t test5_vset_lane_f16(float16x4_t v1, float b, float c) { + float16_t a = (float16_t)b; + return vset_lane_f16(a, v1, 3); +// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}} +// CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} +} + +// CHECK-LABEL: test5_vsetq_lane_f16 +float16x8_t test5_vsetq_lane_f16(float16x8_t v1, float b, float c) { float16_t a = (float16_t)b + 1.0; return vsetq_lane_f16(a, v1, 7); +// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}} // CHECK: ins {{v[0-9]+}}.h[7], {{w[0-9]+}} } +// CHECK-LABEL: test_vset_vget_lane_f16 +int test_vset_vget_lane_f16(float16x4_t a) { + float16x4_t b; + b = vset_lane_f16(3.5, a, 3); + float16_t c = vget_lane_f16(b, 3); + return (int)c; +// CHECK: movz x{{[0-9]+}}, #3 +} + +// CHECK-LABEL: test_vsetq_vgetq_lane_f16 +int test_vsetq_vgetq_lane_f16(float16x8_t a) { + float16x8_t b; + b = vsetq_lane_f16(3.5, a, 5); + float16_t c = vgetq_lane_f16(b, 5); + return (int)c; +// CHECK: movz x{{[0-9]+}}, #3 +} + // CHECK-LABEL: test_vdup_laneq_p64: poly64x1_t test_vdup_laneq_p64(poly64x2_t vec) { return vdup_laneq_p64(vec, 0); diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 003722ef3f8..ac9e22491e6 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -2272,28 +2272,39 @@ static std::string GenOpString(const std::string &name, OpKind op, std::string typeCode = ""; InstructionTypeCode(typestr, ClassS, quad, typeCode); s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; - if (quad) { - s += "int16x8_t __a2 = vreinterpretq_s16_f16(__a1);\\\n"; - s += " vgetq_lane_s16(__a2, __b);"; - } else { - s += "int16x4_t __a2 = vreinterpret_s16_f16(__a1);\\\n"; - s += " vget_lane_s16(__a2, __b);"; - } + + std::string intType = quad ? "int16x8_t" : "int16x4_t"; + std::string intName = quad ? "vgetq" : "vget"; + + // reinterpret float16 vector as int16 vector + s += intType + " __a2 = *(" + intType + " *)(&__a1);\\\n"; + + s += " int16_t __a3 = " + intName + "_lane_s16(__a2, __b);\\\n"; + + // reinterpret int16 vector as float16 vector + s += " float16_t __a4 = *(float16_t *)(&__a3);\\\n"; + s += " __a4;"; break; } case OpScalarSetLane:{ std::string typeCode = ""; InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; - if (quad) { - s += " int16x8_t __b2 = vreinterpretq_s16_f16(b);\\\n"; - s += " int16x8_t __b3 = vsetq_lane_s16(__a1, __b2, __c);\\\n"; - s += " vreinterpretq_f16_s16(__b3);"; - } else { - s += " int16x4_t __b2 = vreinterpret_s16_f16(b);\\\n"; - s += " int16x4_t __b3 = vset_lane_s16(__a1, __b2, __c);\\\n"; - s += " vreinterpret_f16_s16(__b3);"; - } + s += TypeString(proto[1], typestr) + " __a1 = __a;\\\n "; + + std::string origType = quad ? "float16x8_t" : "float16x4_t"; + std::string intType = quad ? "int16x8_t" : "int16x4_t"; + std::string intName = quad ? "vsetq" : "vset"; + + // reinterpret float16_t as int16_t + s += "int16_t __a2 = *(int16_t *)(&__a1);\\\n"; + // reinterpret float16 vector as int16 vector + s += " " + intType + " __b2 = *(" + intType + " *)(&__b);\\\n"; + + s += " " + intType + " __b3 = " + intName + "_lane_s16(__a2, __b2, __c);\\\n"; + + // reinterpret int16 vector as float16 vector + s += " " + origType + " __b4 = *(" + origType + " *)(&__b3);\\\n"; + s += "__b4;"; break; } |

