From d23c577b0b70b34335971abaf3f50e617dda615e Mon Sep 17 00:00:00 2001 From: Khem Raj Date: Wed, 18 Mar 2015 00:01:50 +0000 Subject: [PATCH] fsl e500/e5500/e6500/603e fsqrt implementation Upstream-Status: Pending Signed-off-by: Edmar Wienskoski Signed-off-by: Khem Raj --- sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c | 134 ++++++++++++++++++ sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c | 101 +++++++++++++ sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c | 134 ++++++++++++++++++ .../powerpc/powerpc32/e500mc/fpu/e_sqrtf.c | 101 +++++++++++++ sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c | 134 ++++++++++++++++++ sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c | 101 +++++++++++++ sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c | 134 ++++++++++++++++++ sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c | 101 +++++++++++++ sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c | 134 ++++++++++++++++++ sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c | 101 +++++++++++++ sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c | 134 ++++++++++++++++++ sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c | 101 +++++++++++++ .../linux/powerpc/powerpc32/603e/fpu/Implies | 1 + .../powerpc/powerpc32/e300c3/fpu/Implies | 2 + .../powerpc/powerpc32/e500mc/fpu/Implies | 1 + .../linux/powerpc/powerpc32/e5500/fpu/Implies | 1 + .../linux/powerpc/powerpc32/e6500/fpu/Implies | 1 + .../linux/powerpc/powerpc64/e5500/fpu/Implies | 1 + .../linux/powerpc/powerpc64/e6500/fpu/Implies | 1 + 19 files changed, 1418 insertions(+) create mode 100644 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c create mode 100644 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c create mode 100644 sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c create mode 100644 sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c create mode 100644 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c create mode 100644 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c create mode 100644 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c create mode 100644 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c create mode 100644 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c create mode 100644 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c create mode 100644 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c create mode 100644 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies diff --git a/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c new file mode 100644 index 0000000000..71e516d1c8 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c @@ -0,0 +1,134 @@ +/* Double-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +static const float half = 0.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the actual square root and half of its reciprocal + simultaneously. */ + +#ifdef __STDC__ +double +__ieee754_sqrt (double b) +#else +double +__ieee754_sqrt (b) + double b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { + double y, g, h, d, r; + ieee_double_shape_type u; + + if (__builtin_expect (b != a_inf.value, 1)) + { + fenv_t fe; + + fe = fegetenv_register (); + + u.value = b; + + relax_fenv_state (); + + __asm__ ("frsqrte %[estimate], %[x]\n" + : [estimate] "=f" (y) : [x] "f" (b)); + + /* Following Muller et al, page 168, equation 5.20. + + h goes to 1/(2*sqrt(b)) + g goes to sqrt(b). + + We need three iterations to get within 1ulp. */ + + /* Indicate that these can be performed prior to the branch. GCC + insists on sinking them below the branch, however; it seems like + they'd be better before the branch so that we can cover any latency + from storing the argument and loading its high word. Oh well. */ + + g = b * y; + h = 0.5 * y; + + /* Handle small numbers by scaling. */ + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) + return __ieee754_sqrt (b * two108) * twom54; + +#define FMADD(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ + + /* Final refinement. */ + d = FNMSUB (g, g, b); + + fesetenv_register (fe); + return FMADD (d, h, g); + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_wash (b); +} diff --git a/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c new file mode 100644 index 0000000000..26fa067abf --- /dev/null +++ b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c @@ -0,0 +1,101 @@ +/* Single-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float threehalf = 1.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the reciprocal square root and use that to compute the actual + square root. */ + +#ifdef __STDC__ +float +__ieee754_sqrtf (float b) +#else +float +__ieee754_sqrtf (b) + float b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { +#define FMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + if (__builtin_expect (b != a_inf.value, 1)) + { + double y, x; + fenv_t fe; + + fe = fegetenv_register (); + + relax_fenv_state (); + + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ + y = FMSUB (threehalf, b, b); + + /* Initial estimate. */ + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); + + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + + /* All done. */ + fesetenv_register (fe); + return x * b; + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_washf (b); +} diff --git a/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c new file mode 100644 index 0000000000..71e516d1c8 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c @@ -0,0 +1,134 @@ +/* Double-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +static const float half = 0.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the actual square root and half of its reciprocal + simultaneously. */ + +#ifdef __STDC__ +double +__ieee754_sqrt (double b) +#else +double +__ieee754_sqrt (b) + double b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { + double y, g, h, d, r; + ieee_double_shape_type u; + + if (__builtin_expect (b != a_inf.value, 1)) + { + fenv_t fe; + + fe = fegetenv_register (); + + u.value = b; + + relax_fenv_state (); + + __asm__ ("frsqrte %[estimate], %[x]\n" + : [estimate] "=f" (y) : [x] "f" (b)); + + /* Following Muller et al, page 168, equation 5.20. + + h goes to 1/(2*sqrt(b)) + g goes to sqrt(b). + + We need three iterations to get within 1ulp. */ + + /* Indicate that these can be performed prior to the branch. GCC + insists on sinking them below the branch, however; it seems like + they'd be better before the branch so that we can cover any latency + from storing the argument and loading its high word. Oh well. */ + + g = b * y; + h = 0.5 * y; + + /* Handle small numbers by scaling. */ + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) + return __ieee754_sqrt (b * two108) * twom54; + +#define FMADD(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ + + /* Final refinement. */ + d = FNMSUB (g, g, b); + + fesetenv_register (fe); + return FMADD (d, h, g); + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_wash (b); +} diff --git a/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c new file mode 100644 index 0000000000..26fa067abf --- /dev/null +++ b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c @@ -0,0 +1,101 @@ +/* Single-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float threehalf = 1.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the reciprocal square root and use that to compute the actual + square root. */ + +#ifdef __STDC__ +float +__ieee754_sqrtf (float b) +#else +float +__ieee754_sqrtf (b) + float b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { +#define FMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + if (__builtin_expect (b != a_inf.value, 1)) + { + double y, x; + fenv_t fe; + + fe = fegetenv_register (); + + relax_fenv_state (); + + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ + y = FMSUB (threehalf, b, b); + + /* Initial estimate. */ + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); + + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + + /* All done. */ + fesetenv_register (fe); + return x * b; + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_washf (b); +} diff --git a/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c new file mode 100644 index 0000000000..71e516d1c8 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c @@ -0,0 +1,134 @@ +/* Double-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +static const float half = 0.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the actual square root and half of its reciprocal + simultaneously. */ + +#ifdef __STDC__ +double +__ieee754_sqrt (double b) +#else +double +__ieee754_sqrt (b) + double b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { + double y, g, h, d, r; + ieee_double_shape_type u; + + if (__builtin_expect (b != a_inf.value, 1)) + { + fenv_t fe; + + fe = fegetenv_register (); + + u.value = b; + + relax_fenv_state (); + + __asm__ ("frsqrte %[estimate], %[x]\n" + : [estimate] "=f" (y) : [x] "f" (b)); + + /* Following Muller et al, page 168, equation 5.20. + + h goes to 1/(2*sqrt(b)) + g goes to sqrt(b). + + We need three iterations to get within 1ulp. */ + + /* Indicate that these can be performed prior to the branch. GCC + insists on sinking them below the branch, however; it seems like + they'd be better before the branch so that we can cover any latency + from storing the argument and loading its high word. Oh well. */ + + g = b * y; + h = 0.5 * y; + + /* Handle small numbers by scaling. */ + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) + return __ieee754_sqrt (b * two108) * twom54; + +#define FMADD(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ + + /* Final refinement. */ + d = FNMSUB (g, g, b); + + fesetenv_register (fe); + return FMADD (d, h, g); + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_wash (b); +} diff --git a/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c new file mode 100644 index 0000000000..26fa067abf --- /dev/null +++ b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c @@ -0,0 +1,101 @@ +/* Single-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float threehalf = 1.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the reciprocal square root and use that to compute the actual + square root. */ + +#ifdef __STDC__ +float +__ieee754_sqrtf (float b) +#else +float +__ieee754_sqrtf (b) + float b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { +#define FMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + if (__builtin_expect (b != a_inf.value, 1)) + { + double y, x; + fenv_t fe; + + fe = fegetenv_register (); + + relax_fenv_state (); + + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ + y = FMSUB (threehalf, b, b); + + /* Initial estimate. */ + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); + + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + + /* All done. */ + fesetenv_register (fe); + return x * b; + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_washf (b); +} diff --git a/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c new file mode 100644 index 0000000000..71e516d1c8 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c @@ -0,0 +1,134 @@ +/* Double-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +static const float half = 0.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the actual square root and half of its reciprocal + simultaneously. */ + +#ifdef __STDC__ +double +__ieee754_sqrt (double b) +#else +double +__ieee754_sqrt (b) + double b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { + double y, g, h, d, r; + ieee_double_shape_type u; + + if (__builtin_expect (b != a_inf.value, 1)) + { + fenv_t fe; + + fe = fegetenv_register (); + + u.value = b; + + relax_fenv_state (); + + __asm__ ("frsqrte %[estimate], %[x]\n" + : [estimate] "=f" (y) : [x] "f" (b)); + + /* Following Muller et al, page 168, equation 5.20. + + h goes to 1/(2*sqrt(b)) + g goes to sqrt(b). + + We need three iterations to get within 1ulp. */ + + /* Indicate that these can be performed prior to the branch. GCC + insists on sinking them below the branch, however; it seems like + they'd be better before the branch so that we can cover any latency + from storing the argument and loading its high word. Oh well. */ + + g = b * y; + h = 0.5 * y; + + /* Handle small numbers by scaling. */ + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) + return __ieee754_sqrt (b * two108) * twom54; + +#define FMADD(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ + + /* Final refinement. */ + d = FNMSUB (g, g, b); + + fesetenv_register (fe); + return FMADD (d, h, g); + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_wash (b); +} diff --git a/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c new file mode 100644 index 0000000000..26fa067abf --- /dev/null +++ b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c @@ -0,0 +1,101 @@ +/* Single-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float threehalf = 1.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the reciprocal square root and use that to compute the actual + square root. */ + +#ifdef __STDC__ +float +__ieee754_sqrtf (float b) +#else +float +__ieee754_sqrtf (b) + float b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { +#define FMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + if (__builtin_expect (b != a_inf.value, 1)) + { + double y, x; + fenv_t fe; + + fe = fegetenv_register (); + + relax_fenv_state (); + + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ + y = FMSUB (threehalf, b, b); + + /* Initial estimate. */ + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); + + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + + /* All done. */ + fesetenv_register (fe); + return x * b; + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_washf (b); +} diff --git a/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c new file mode 100644 index 0000000000..71e516d1c8 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c @@ -0,0 +1,134 @@ +/* Double-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +static const float half = 0.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the actual square root and half of its reciprocal + simultaneously. */ + +#ifdef __STDC__ +double +__ieee754_sqrt (double b) +#else +double +__ieee754_sqrt (b) + double b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { + double y, g, h, d, r; + ieee_double_shape_type u; + + if (__builtin_expect (b != a_inf.value, 1)) + { + fenv_t fe; + + fe = fegetenv_register (); + + u.value = b; + + relax_fenv_state (); + + __asm__ ("frsqrte %[estimate], %[x]\n" + : [estimate] "=f" (y) : [x] "f" (b)); + + /* Following Muller et al, page 168, equation 5.20. + + h goes to 1/(2*sqrt(b)) + g goes to sqrt(b). + + We need three iterations to get within 1ulp. */ + + /* Indicate that these can be performed prior to the branch. GCC + insists on sinking them below the branch, however; it seems like + they'd be better before the branch so that we can cover any latency + from storing the argument and loading its high word. Oh well. */ + + g = b * y; + h = 0.5 * y; + + /* Handle small numbers by scaling. */ + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) + return __ieee754_sqrt (b * two108) * twom54; + +#define FMADD(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ + + /* Final refinement. */ + d = FNMSUB (g, g, b); + + fesetenv_register (fe); + return FMADD (d, h, g); + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_wash (b); +} diff --git a/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c new file mode 100644 index 0000000000..26fa067abf --- /dev/null +++ b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c @@ -0,0 +1,101 @@ +/* Single-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float threehalf = 1.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the reciprocal square root and use that to compute the actual + square root. */ + +#ifdef __STDC__ +float +__ieee754_sqrtf (float b) +#else +float +__ieee754_sqrtf (b) + float b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { +#define FMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + if (__builtin_expect (b != a_inf.value, 1)) + { + double y, x; + fenv_t fe; + + fe = fegetenv_register (); + + relax_fenv_state (); + + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ + y = FMSUB (threehalf, b, b); + + /* Initial estimate. */ + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); + + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + + /* All done. */ + fesetenv_register (fe); + return x * b; + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_washf (b); +} diff --git a/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c new file mode 100644 index 0000000000..71e516d1c8 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c @@ -0,0 +1,134 @@ +/* Double-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +static const float half = 0.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the actual square root and half of its reciprocal + simultaneously. */ + +#ifdef __STDC__ +double +__ieee754_sqrt (double b) +#else +double +__ieee754_sqrt (b) + double b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { + double y, g, h, d, r; + ieee_double_shape_type u; + + if (__builtin_expect (b != a_inf.value, 1)) + { + fenv_t fe; + + fe = fegetenv_register (); + + u.value = b; + + relax_fenv_state (); + + __asm__ ("frsqrte %[estimate], %[x]\n" + : [estimate] "=f" (y) : [x] "f" (b)); + + /* Following Muller et al, page 168, equation 5.20. + + h goes to 1/(2*sqrt(b)) + g goes to sqrt(b). + + We need three iterations to get within 1ulp. */ + + /* Indicate that these can be performed prior to the branch. GCC + insists on sinking them below the branch, however; it seems like + they'd be better before the branch so that we can cover any latency + from storing the argument and loading its high word. Oh well. */ + + g = b * y; + h = 0.5 * y; + + /* Handle small numbers by scaling. */ + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) + return __ieee754_sqrt (b * two108) * twom54; + +#define FMADD(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ + + /* Final refinement. */ + d = FNMSUB (g, g, b); + + fesetenv_register (fe); + return FMADD (d, h, g); + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_wash (b); +} diff --git a/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c new file mode 100644 index 0000000000..26fa067abf --- /dev/null +++ b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c @@ -0,0 +1,101 @@ +/* Single-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float threehalf = 1.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the reciprocal square root and use that to compute the actual + square root. */ + +#ifdef __STDC__ +float +__ieee754_sqrtf (float b) +#else +float +__ieee754_sqrtf (b) + float b; +#endif +{ + if (__builtin_expect (b > 0, 1)) + { +#define FMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + if (__builtin_expect (b != a_inf.value, 1)) + { + double y, x; + fenv_t fe; + + fe = fegetenv_register (); + + relax_fenv_state (); + + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ + y = FMSUB (threehalf, b, b); + + /* Initial estimate. */ + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); + + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + + /* All done. */ + fesetenv_register (fe); + return x * b; + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_washf (b); +} diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies new file mode 100644 index 0000000000..b103b4dea5 --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/603e/fpu diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies new file mode 100644 index 0000000000..64db17fada --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies @@ -0,0 +1,2 @@ +# e300c3 is a variant of 603e so use the same optimizations for sqrt +powerpc/powerpc32/603e/fpu diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies new file mode 100644 index 0000000000..7eac5fcf02 --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/e500mc/fpu diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies new file mode 100644 index 0000000000..264b2a7700 --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/e5500/fpu diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies new file mode 100644 index 0000000000..a25934467b --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/e6500/fpu diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies new file mode 100644 index 0000000000..a7bc854be8 --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/e5500/fpu diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies new file mode 100644 index 0000000000..04ff8cc181 --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/e6500/fpu