summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGen/complex-math.c
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-10-11 00:57:18 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-10-11 00:57:18 +0000
commita216cad0fcf348b1d03a0a2e3bf6b36d0d880e85 (patch)
treeeba3b52d859da45f6cba55d30319b78c467c0168 /clang/test/CodeGen/complex-math.c
parent62de6b96b52f2da368ab8ad3bb183fbd1f0af72d (diff)
downloadbcm5719-llvm-a216cad0fcf348b1d03a0a2e3bf6b36d0d880e85.tar.gz
bcm5719-llvm-a216cad0fcf348b1d03a0a2e3bf6b36d0d880e85.zip
[complex] Teach Clang to preserve different-type operands to arithmetic
operators where one type is a C complex type, and to emit both the efficient and correct implementation for complex arithmetic according to C11 Annex G using this extra information. For both multiply and divide the old code was writing a long-hand reduced version of the math without any of the special handling of inf and NaN recommended by the standard here. Instead of putting more complexity here, this change does what GCC does which is to emit a libcall for the fully general case. However, the old code also failed to do the proper minimization of the set of operations when there was a mixed complex and real operation. In those cases, C provides a spec for much more minimal operations that are valid. Clang now emits the exact suggested operations. This change isn't *just* about performance though, without minimizing these operations, we again lose the correct handling of infinities and NaNs. It is critical that this happen in the frontend based on assymetric type operands to complex math operations. The performance implications of this change aren't trivial either. I've run a set of benchmarks in Eigen, an open source mathematics library that makes heavy use of complex. While a few have slowed down due to the libcall being introduce, most sped up and some by a huge amount: up to 100% and 140%. In order to make all of this work, also match the algorithm in the constant evaluator to the one in the runtime library. Currently it is a broken port of the simplifications from C's Annex G to the long-hand formulation of the algorithm. Splitting this patch up is very hard because none of this works without the AST change to preserve non-complex operands. Sorry for the enormous change. Follow-up changes will include support for sinking the libcalls onto cold paths in common cases and fastmath improvements to allow more aggressive backend folding. Differential Revision: http://reviews.llvm.org/D5698 llvm-svn: 219557
Diffstat (limited to 'clang/test/CodeGen/complex-math.c')
-rw-r--r--clang/test/CodeGen/complex-math.c367
1 files changed, 367 insertions, 0 deletions
diff --git a/clang/test/CodeGen/complex-math.c b/clang/test/CodeGen/complex-math.c
new file mode 100644
index 00000000000..a0ac6ecec3b
--- /dev/null
+++ b/clang/test/CodeGen/complex-math.c
@@ -0,0 +1,367 @@
+// RUN: %clang_cc1 %s -O1 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86
+
+float _Complex add_float_rr(float a, float b) {
+ // X86-LABEL: @add_float_rr(
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+float _Complex add_float_cr(float _Complex a, float b) {
+ // X86-LABEL: @add_float_cr(
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+float _Complex add_float_rc(float a, float _Complex b) {
+ // X86-LABEL: @add_float_rc(
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+float _Complex add_float_cc(float _Complex a, float _Complex b) {
+ // X86-LABEL: @add_float_cc(
+ // X86: fadd
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+
+float _Complex sub_float_rr(float a, float b) {
+ // X86-LABEL: @sub_float_rr(
+ // X86: fsub
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+float _Complex sub_float_cr(float _Complex a, float b) {
+ // X86-LABEL: @sub_float_cr(
+ // X86: fsub
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+float _Complex sub_float_rc(float a, float _Complex b) {
+ // X86-LABEL: @sub_float_rc(
+ // X86: fsub
+ // X86: fsub float -0.{{0+}}e+00,
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+float _Complex sub_float_cc(float _Complex a, float _Complex b) {
+ // X86-LABEL: @sub_float_cc(
+ // X86: fsub
+ // X86: fsub
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+
+float _Complex mul_float_rr(float a, float b) {
+ // X86-LABEL: @mul_float_rr(
+ // X86: fmul
+ // X86-NOT: fmul
+ // X86: ret
+ return a * b;
+}
+float _Complex mul_float_cr(float _Complex a, float b) {
+ // X86-LABEL: @mul_float_cr(
+ // X86: fmul
+ // X86: fmul
+ // X86-NOT: fmul
+ // X86: ret
+ return a * b;
+}
+float _Complex mul_float_rc(float a, float _Complex b) {
+ // X86-LABEL: @mul_float_rc(
+ // X86: fmul
+ // X86: fmul
+ // X86-NOT: fmul
+ // X86: ret
+ return a * b;
+}
+float _Complex mul_float_cc(float _Complex a, float _Complex b) {
+ // X86-LABEL: @mul_float_cc(
+ // X86-NOT: fmul
+ // X86: call <2 x float> @__mulsc3(
+ // X86: ret
+ return a * b;
+}
+
+float _Complex div_float_rr(float a, float b) {
+ // X86-LABEL: @div_float_rr(
+ // X86: fdiv
+ // X86-NOT: fdiv
+ // X86: ret
+ return a / b;
+}
+float _Complex div_float_cr(float _Complex a, float b) {
+ // X86-LABEL: @div_float_cr(
+ // X86: fdiv
+ // X86: fdiv
+ // X86-NOT: fdiv
+ // X86: ret
+ return a / b;
+}
+float _Complex div_float_rc(float a, float _Complex b) {
+ // X86-LABEL: @div_float_rc(
+ // X86-NOT: fdiv
+ // X86: call <2 x float> @__divsc3(
+ // X86: ret
+ return a / b;
+}
+float _Complex div_float_cc(float _Complex a, float _Complex b) {
+ // X86-LABEL: @div_float_cc(
+ // X86-NOT: fdiv
+ // X86: call <2 x float> @__divsc3(
+ // X86: ret
+ return a / b;
+}
+
+double _Complex add_double_rr(double a, double b) {
+ // X86-LABEL: @add_double_rr(
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+double _Complex add_double_cr(double _Complex a, double b) {
+ // X86-LABEL: @add_double_cr(
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+double _Complex add_double_rc(double a, double _Complex b) {
+ // X86-LABEL: @add_double_rc(
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+double _Complex add_double_cc(double _Complex a, double _Complex b) {
+ // X86-LABEL: @add_double_cc(
+ // X86: fadd
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+
+double _Complex sub_double_rr(double a, double b) {
+ // X86-LABEL: @sub_double_rr(
+ // X86: fsub
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+double _Complex sub_double_cr(double _Complex a, double b) {
+ // X86-LABEL: @sub_double_cr(
+ // X86: fsub
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+double _Complex sub_double_rc(double a, double _Complex b) {
+ // X86-LABEL: @sub_double_rc(
+ // X86: fsub
+ // X86: fsub double -0.{{0+}}e+00,
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+double _Complex sub_double_cc(double _Complex a, double _Complex b) {
+ // X86-LABEL: @sub_double_cc(
+ // X86: fsub
+ // X86: fsub
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+
+double _Complex mul_double_rr(double a, double b) {
+ // X86-LABEL: @mul_double_rr(
+ // X86: fmul
+ // X86-NOT: fmul
+ // X86: ret
+ return a * b;
+}
+double _Complex mul_double_cr(double _Complex a, double b) {
+ // X86-LABEL: @mul_double_cr(
+ // X86: fmul
+ // X86: fmul
+ // X86-NOT: fmul
+ // X86: ret
+ return a * b;
+}
+double _Complex mul_double_rc(double a, double _Complex b) {
+ // X86-LABEL: @mul_double_rc(
+ // X86: fmul
+ // X86: fmul
+ // X86-NOT: fmul
+ // X86: ret
+ return a * b;
+}
+double _Complex mul_double_cc(double _Complex a, double _Complex b) {
+ // X86-LABEL: @mul_double_cc(
+ // X86-NOT: fmul
+ // X86: call { double, double } @__muldc3(
+ // X86: ret
+ return a * b;
+}
+
+double _Complex div_double_rr(double a, double b) {
+ // X86-LABEL: @div_double_rr(
+ // X86: fdiv
+ // X86-NOT: fdiv
+ // X86: ret
+ return a / b;
+}
+double _Complex div_double_cr(double _Complex a, double b) {
+ // X86-LABEL: @div_double_cr(
+ // X86: fdiv
+ // X86: fdiv
+ // X86-NOT: fdiv
+ // X86: ret
+ return a / b;
+}
+double _Complex div_double_rc(double a, double _Complex b) {
+ // X86-LABEL: @div_double_rc(
+ // X86-NOT: fdiv
+ // X86: call { double, double } @__divdc3(
+ // X86: ret
+ return a / b;
+}
+double _Complex div_double_cc(double _Complex a, double _Complex b) {
+ // X86-LABEL: @div_double_cc(
+ // X86-NOT: fdiv
+ // X86: call { double, double } @__divdc3(
+ // X86: ret
+ return a / b;
+}
+
+long double _Complex add_long_double_rr(long double a, long double b) {
+ // X86-LABEL: @add_long_double_rr(
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+long double _Complex add_long_double_cr(long double _Complex a, long double b) {
+ // X86-LABEL: @add_long_double_cr(
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+long double _Complex add_long_double_rc(long double a, long double _Complex b) {
+ // X86-LABEL: @add_long_double_rc(
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+long double _Complex add_long_double_cc(long double _Complex a, long double _Complex b) {
+ // X86-LABEL: @add_long_double_cc(
+ // X86: fadd
+ // X86: fadd
+ // X86-NOT: fadd
+ // X86: ret
+ return a + b;
+}
+
+long double _Complex sub_long_double_rr(long double a, long double b) {
+ // X86-LABEL: @sub_long_double_rr(
+ // X86: fsub
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+long double _Complex sub_long_double_cr(long double _Complex a, long double b) {
+ // X86-LABEL: @sub_long_double_cr(
+ // X86: fsub
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+long double _Complex sub_long_double_rc(long double a, long double _Complex b) {
+ // X86-LABEL: @sub_long_double_rc(
+ // X86: fsub
+ // X86: fsub x86_fp80 0xK8{{0+}},
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+long double _Complex sub_long_double_cc(long double _Complex a, long double _Complex b) {
+ // X86-LABEL: @sub_long_double_cc(
+ // X86: fsub
+ // X86: fsub
+ // X86-NOT: fsub
+ // X86: ret
+ return a - b;
+}
+
+long double _Complex mul_long_double_rr(long double a, long double b) {
+ // X86-LABEL: @mul_long_double_rr(
+ // X86: fmul
+ // X86-NOT: fmul
+ // X86: ret
+ return a * b;
+}
+long double _Complex mul_long_double_cr(long double _Complex a, long double b) {
+ // X86-LABEL: @mul_long_double_cr(
+ // X86: fmul
+ // X86: fmul
+ // X86-NOT: fmul
+ // X86: ret
+ return a * b;
+}
+long double _Complex mul_long_double_rc(long double a, long double _Complex b) {
+ // X86-LABEL: @mul_long_double_rc(
+ // X86: fmul
+ // X86: fmul
+ // X86-NOT: fmul
+ // X86: ret
+ return a * b;
+}
+long double _Complex mul_long_double_cc(long double _Complex a, long double _Complex b) {
+ // X86-LABEL: @mul_long_double_cc(
+ // X86-NOT: fmul
+ // X86: call { x86_fp80, x86_fp80 } @__mulxc3(
+ // X86: ret
+ return a * b;
+}
+
+long double _Complex div_long_double_rr(long double a, long double b) {
+ // X86-LABEL: @div_long_double_rr(
+ // X86: fdiv
+ // X86-NOT: fdiv
+ // X86: ret
+ return a / b;
+}
+long double _Complex div_long_double_cr(long double _Complex a, long double b) {
+ // X86-LABEL: @div_long_double_cr(
+ // X86: fdiv
+ // X86: fdiv
+ // X86-NOT: fdiv
+ // X86: ret
+ return a / b;
+}
+long double _Complex div_long_double_rc(long double a, long double _Complex b) {
+ // X86-LABEL: @div_long_double_rc(
+ // X86-NOT: fdiv
+ // X86: call { x86_fp80, x86_fp80 } @__divxc3(
+ // X86: ret
+ return a / b;
+}
+long double _Complex div_long_double_cc(long double _Complex a, long double _Complex b) {
+ // X86-LABEL: @div_long_double_cc(
+ // X86-NOT: fdiv
+ // X86: call { x86_fp80, x86_fp80 } @__divxc3(
+ // X86: ret
+ return a / b;
+}
OpenPOWER on IntegriCloud