libstdc++
simd_math.h
1 // Math overloads for simd -*- C++ -*-
2 
3 // Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
27 
28 #if __cplusplus >= 201703L
29 
30 #include <utility>
31 #include <iomanip>
32 
33 _GLIBCXX_SIMD_BEGIN_NAMESPACE
34 template <typename _Tp, typename _V>
35  using _Samesize = fixed_size_simd<_Tp, _V::size()>;
36 
37 // _Math_return_type {{{
38 template <typename _DoubleR, typename _Tp, typename _Abi>
39  struct _Math_return_type;
40 
41 template <typename _DoubleR, typename _Tp, typename _Abi>
42  using _Math_return_type_t =
43  typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
44 
45 template <typename _Tp, typename _Abi>
46  struct _Math_return_type<double, _Tp, _Abi>
47  { using type = simd<_Tp, _Abi>; };
48 
49 template <typename _Tp, typename _Abi>
50  struct _Math_return_type<bool, _Tp, _Abi>
51  { using type = simd_mask<_Tp, _Abi>; };
52 
53 template <typename _DoubleR, typename _Tp, typename _Abi>
54  struct _Math_return_type
55  { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
56 
57 //}}}
58 // _GLIBCXX_SIMD_MATH_CALL_ {{{
59 #define _GLIBCXX_SIMD_MATH_CALL_(__name) \
60 template <typename _Tp, typename _Abi, typename..., \
61  typename _R = _Math_return_type_t< \
62  decltype(std::__name(declval<double>())), _Tp, _Abi>> \
63  enable_if_t<is_floating_point_v<_Tp>, _R> \
64  __name(simd<_Tp, _Abi> __x) \
65  { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
66 
67 // }}}
68 //_Extra_argument_type{{{
69 template <typename _Up, typename _Tp, typename _Abi>
70  struct _Extra_argument_type;
71 
72 template <typename _Tp, typename _Abi>
73  struct _Extra_argument_type<_Tp*, _Tp, _Abi>
74  {
75  using type = simd<_Tp, _Abi>*;
76  static constexpr double* declval();
77  static constexpr bool __needs_temporary_scalar = true;
78 
79  _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
80  { return &__data(*__x); }
81  };
82 
83 template <typename _Up, typename _Tp, typename _Abi>
84  struct _Extra_argument_type<_Up*, _Tp, _Abi>
85  {
86  static_assert(is_integral_v<_Up>);
87  using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
88  static constexpr _Up* declval();
89  static constexpr bool __needs_temporary_scalar = true;
90 
91  _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
92  { return &__data(*__x); }
93  };
94 
95 template <typename _Tp, typename _Abi>
96  struct _Extra_argument_type<_Tp, _Tp, _Abi>
97  {
98  using type = simd<_Tp, _Abi>;
99  static constexpr double declval();
100  static constexpr bool __needs_temporary_scalar = false;
101 
102  _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
103  _S_data(const type& __x)
104  { return __data(__x); }
105  };
106 
107 template <typename _Up, typename _Tp, typename _Abi>
108  struct _Extra_argument_type
109  {
110  static_assert(is_integral_v<_Up>);
111  using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
112  static constexpr _Up declval();
113  static constexpr bool __needs_temporary_scalar = false;
114 
115  _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
116  _S_data(const type& __x)
117  { return __data(__x); }
118  };
119 
120 //}}}
121 // _GLIBCXX_SIMD_MATH_CALL2_ {{{
122 #define _GLIBCXX_SIMD_MATH_CALL2_(__name, __arg2) \
123 template < \
124  typename _Tp, typename _Abi, typename..., \
125  typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \
126  typename _R = _Math_return_type_t< \
127  decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \
128  enable_if_t<is_floating_point_v<_Tp>, _R> \
129  __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \
130  { \
131  return {__private_init, \
132  _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \
133  } \
134 template <typename _Up, typename _Tp, typename _Abi> \
135  _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \
136  decltype(std::__name( \
137  declval<double>(), \
138  declval<enable_if_t< \
139  conjunction_v< \
140  is_same<__arg2, _Tp>, \
141  negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \
142  is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \
143  double>>())), \
144  _Tp, _Abi> \
145  __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \
146  { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
147 
148 // }}}
149 // _GLIBCXX_SIMD_MATH_CALL3_ {{{
150 #define _GLIBCXX_SIMD_MATH_CALL3_(__name, __arg2, __arg3) \
151 template <typename _Tp, typename _Abi, typename..., \
152  typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \
153  typename _Arg3 = _Extra_argument_type<__arg3, _Tp, _Abi>, \
154  typename _R = _Math_return_type_t< \
155  decltype(std::__name(declval<double>(), _Arg2::declval(), \
156  _Arg3::declval())), \
157  _Tp, _Abi>> \
158  enable_if_t<is_floating_point_v<_Tp>, _R> \
159  __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \
160  const typename _Arg3::type& __z) \
161  { \
162  return {__private_init, \
163  _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \
164  _Arg3::_S_data(__z))}; \
165  } \
166 template < \
167  typename _T0, typename _T1, typename _T2, typename..., \
168  typename _U0 = __remove_cvref_t<_T0>, \
169  typename _U1 = __remove_cvref_t<_T1>, \
170  typename _U2 = __remove_cvref_t<_T2>, \
171  typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \
172  typename = enable_if_t<conjunction_v< \
173  is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \
174  is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \
175  negation<conjunction< \
176  is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \
177  _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \
178  declval<const _Simd&>(), \
179  declval<const _Simd&>())) \
180  __name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \
181  { \
182  return __name(_Simd(static_cast<_T0&&>(__xx)), \
183  _Simd(static_cast<_T1&&>(__yy)), \
184  _Simd(static_cast<_T2&&>(__zz))); \
185  }
186 
187 // }}}
188 // __cosSeries {{{
189 template <typename _Abi>
190  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
191  __cosSeries(const simd<float, _Abi>& __x)
192  {
193  const simd<float, _Abi> __x2 = __x * __x;
194  simd<float, _Abi> __y;
195  __y = 0x1.ap-16f; // 1/8!
196  __y = __y * __x2 - 0x1.6c1p-10f; // -1/6!
197  __y = __y * __x2 + 0x1.555556p-5f; // 1/4!
198  return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
199  }
200 
201 template <typename _Abi>
202  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
203  __cosSeries(const simd<double, _Abi>& __x)
204  {
205  const simd<double, _Abi> __x2 = __x * __x;
206  simd<double, _Abi> __y;
207  __y = 0x1.AC00000000000p-45; // 1/16!
208  __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
209  __y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12!
210  __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
211  __y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8!
212  __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
213  __y = __y * __x2 + 0x1.5555555555554p-5; // 1/4!
214  return (__y * __x2 - .5f) * __x2 + 1.f;
215  }
216 
217 // }}}
218 // __sinSeries {{{
219 template <typename _Abi>
220  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
221  __sinSeries(const simd<float, _Abi>& __x)
222  {
223  const simd<float, _Abi> __x2 = __x * __x;
224  simd<float, _Abi> __y;
225  __y = -0x1.9CC000p-13f; // -1/7!
226  __y = __y * __x2 + 0x1.111100p-7f; // 1/5!
227  __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
228  return __y * (__x2 * __x) + __x;
229  }
230 
231 template <typename _Abi>
232  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
233  __sinSeries(const simd<double, _Abi>& __x)
234  {
235  // __x = [0, 0.7854 = pi/4]
236  // __x² = [0, 0.6169 = pi²/8]
237  const simd<double, _Abi> __x2 = __x * __x;
238  simd<double, _Abi> __y;
239  __y = -0x1.ACF0000000000p-41; // -1/15!
240  __y = __y * __x2 + 0x1.6124400000000p-33; // 1/13!
241  __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
242  __y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9!
243  __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
244  __y = __y * __x2 + 0x1.1111111111110p-7; // 1/5!
245  __y = __y * __x2 - 0x1.5555555555555p-3; // -1/3!
246  return __y * (__x2 * __x) + __x;
247  }
248 
249 // }}}
250 // __zero_low_bits {{{
251 template <int _Bits, typename _Tp, typename _Abi>
252  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
253  __zero_low_bits(simd<_Tp, _Abi> __x)
254  {
255  const simd<_Tp, _Abi> __bitmask
256  = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
257  return {__private_init,
258  _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
259  }
260 
261 // }}}
262 // __fold_input {{{
263 
264 /**@internal
265  * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
266  * quadrant 0: [-¼π, ¼π]
267  * quadrant 1: [ ¼π, ¾π]
268  * quadrant 2: [ ¾π, 1¼π]
269  * quadrant 3: [1¼π, 1¾π]
270  *
271  * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
272  * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
273  * ```
274  * y = trunc(x / ¼π);
275  * y += fmod(y, 2);
276  * ```
277  * This can be simplified by moving the (implicit) division by 2 into the
278  * truncation expression. The `+= fmod` effect can the be achieved by using
279  * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
280  * `2/π * x` is better (faster).
281  */
282 template <typename _Tp, typename _Abi>
283  struct _Folded
284  {
285  simd<_Tp, _Abi> _M_x;
286  rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
287  };
288 
289 namespace __math_float {
290 inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
291 inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/π
292 inline constexpr float __pi_2_5bits0
293  = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
294 inline constexpr float __pi_2_5bits0_rem
295  = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
296 } // namespace __math_float
297 namespace __math_double {
298 inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
299 inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/π
300 inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // π/2
301 } // namespace __math_double
302 
303 template <typename _Abi>
304  _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
305  __fold_input(const simd<float, _Abi>& __x)
306  {
307  using _V = simd<float, _Abi>;
308  using _IV = rebind_simd_t<int, _V>;
309  using namespace __math_float;
310  _Folded<float, _Abi> __r;
311  __r._M_x = abs(__x);
312 #if 0
313  // zero most mantissa bits:
314  constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/π
315  const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
316  // split π into 4 parts, the first three with 13 trailing zeros (to make the
317  // following multiplications precise):
318  constexpr float __pi0 = 0x1.920000p1f;
319  constexpr float __pi1 = 0x1.fb4000p-11f;
320  constexpr float __pi2 = 0x1.444000p-23f;
321  constexpr float __pi3 = 0x1.68c234p-38f;
322  __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
323 #else
324  if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
325  __r._M_quadrant = 0;
326  else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
327  {
328  const _V __y = nearbyint(__r._M_x * __2_over_pi);
329  __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
330  __r._M_x -= __y * __pi_2_5bits0;
331  __r._M_x -= __y * __pi_2_5bits0_rem;
332  }
333  else
334  {
335  using __math_double::__2_over_pi;
336  using __math_double::__pi_2;
337  using _VD = rebind_simd_t<double, _V>;
338  _VD __xd = static_simd_cast<_VD>(__r._M_x);
339  _VD __y = nearbyint(__xd * __2_over_pi);
340  __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
341  __r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
342  }
343 #endif
344  return __r;
345  }
346 
347 template <typename _Abi>
348  _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
349  __fold_input(const simd<double, _Abi>& __x)
350  {
351  using _V = simd<double, _Abi>;
352  using _IV = rebind_simd_t<int, _V>;
353  using namespace __math_double;
354 
355  _Folded<double, _Abi> __r;
356  __r._M_x = abs(__x);
357  if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
358  {
359  __r._M_quadrant = 0;
360  return __r;
361  }
362  const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
363  __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
364 
365  if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
366  {
367  // x - y * pi/2, y uses no more than 11 mantissa bits
368  __r._M_x -= __y * 0x1.921FB54443000p0;
369  __r._M_x -= __y * -0x1.73DCB3B39A000p-43;
370  __r._M_x -= __y * 0x1.45C06E0E68948p-86;
371  }
372  else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
373  {
374  // x - y * pi/2, y uses no more than 29 mantissa bits
375  __r._M_x -= __y * 0x1.921FB40000000p0;
376  __r._M_x -= __y * 0x1.4442D00000000p-24;
377  __r._M_x -= __y * 0x1.8469898CC5170p-48;
378  }
379  else
380  {
381  // x - y * pi/2, y may require all mantissa bits
382  const _V __y_hi = __zero_low_bits<26>(__y);
383  const _V __y_lo = __y - __y_hi;
384  const auto __pi_2_1 = 0x1.921FB50000000p0;
385  const auto __pi_2_2 = 0x1.110B460000000p-26;
386  const auto __pi_2_3 = 0x1.1A62630000000p-54;
387  const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
388  __r._M_x = __r._M_x - __y_hi * __pi_2_1
389  - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
390  - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
391  - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
392  - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
393  - max(__y * __pi_2_4, __y_lo * __pi_2_3)
394  - min(__y * __pi_2_4, __y_lo * __pi_2_3);
395  }
396  return __r;
397  }
398 
399 // }}}
400 // __extract_exponent_as_int {{{
401 template <typename _Tp, typename _Abi>
402  rebind_simd_t<int, simd<_Tp, _Abi>>
403  __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
404  {
405  using _Vp = simd<_Tp, _Abi>;
406  using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
407  using namespace std::experimental::__float_bitwise_operators;
408  using namespace std::experimental::__proposed;
409  const _Vp __exponent_mask
410  = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
411  return static_simd_cast<rebind_simd_t<int, _Vp>>(
412  simd_bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
413  >> (__digits_v<_Tp> - 1));
414  }
415 
416 // }}}
417 // __impl_or_fallback {{{
418 template <typename ImplFun, typename FallbackFun, typename... _Args>
419  _GLIBCXX_SIMD_INTRINSIC auto
420  __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
421  _Args&&... __args)
422  -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
423  { return __impl_fun(static_cast<_Args&&>(__args)...); }
424 
425 template <typename ImplFun, typename FallbackFun, typename... _Args>
426  inline auto
427  __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
428  _Args&&... __args)
429  -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
430  { return __fallback_fun(static_cast<_Args&&>(__args)...); }
431 
432 template <typename... _Args>
433  _GLIBCXX_SIMD_INTRINSIC auto
434  __impl_or_fallback(_Args&&... __args)
435  {
436  return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
437  }
438 //}}}
439 
440 // trigonometric functions {{{
441 _GLIBCXX_SIMD_MATH_CALL_(acos)
442 _GLIBCXX_SIMD_MATH_CALL_(asin)
443 _GLIBCXX_SIMD_MATH_CALL_(atan)
444 _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
445 
446 /*
447  * algorithm for sine and cosine:
448  *
449  * The result can be calculated with sine or cosine depending on the π/4 section
450  * the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x²
451  *
452  * sine:
453  * Map -__x to __x and invert the output
454  * Extend precision of __x - n * π/4 by calculating
455  * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
456  *
457  * Calculate Taylor series with tuned coefficients.
458  * Fix sign.
459  */
460 // cos{{{
461 template <typename _Tp, typename _Abi>
462  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
463  cos(const simd<_Tp, _Abi>& __x)
464  {
465  using _V = simd<_Tp, _Abi>;
466  if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
467  return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
468  else
469  {
470  if constexpr (is_same_v<_Tp, float>)
471  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
472  return static_simd_cast<_V>(
473  cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
474 
475  const auto __f = __fold_input(__x);
476  // quadrant | effect
477  // 0 | cosSeries, +
478  // 1 | sinSeries, -
479  // 2 | cosSeries, -
480  // 3 | sinSeries, +
481  using namespace std::experimental::__float_bitwise_operators;
482  const _V __sign_flip
483  = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
484 
485  const auto __need_cos = (__f._M_quadrant & 1) == 0;
486  if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
487  return __sign_flip ^ __cosSeries(__f._M_x);
488  else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
489  return __sign_flip ^ __sinSeries(__f._M_x);
490  else // some_of(__need_cos)
491  {
492  _V __r = __sinSeries(__f._M_x);
493  where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
494  return __r ^ __sign_flip;
495  }
496  }
497  }
498 
499 template <typename _Tp>
500  _GLIBCXX_SIMD_ALWAYS_INLINE
501  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
502  cos(simd<_Tp, simd_abi::scalar> __x)
503  { return std::cos(__data(__x)); }
504 
505 //}}}
506 // sin{{{
507 template <typename _Tp, typename _Abi>
508  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
509  sin(const simd<_Tp, _Abi>& __x)
510  {
511  using _V = simd<_Tp, _Abi>;
512  if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
513  return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
514  else
515  {
516  if constexpr (is_same_v<_Tp, float>)
517  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
518  return static_simd_cast<_V>(
519  sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
520 
521  const auto __f = __fold_input(__x);
522  // quadrant | effect
523  // 0 | sinSeries
524  // 1 | cosSeries
525  // 2 | sinSeries, sign flip
526  // 3 | cosSeries, sign flip
527  using namespace std::experimental::__float_bitwise_operators;
528  const auto __sign_flip
529  = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
530 
531  const auto __need_sin = (__f._M_quadrant & 1) == 0;
532  if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
533  return __sign_flip ^ __sinSeries(__f._M_x);
534  else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
535  return __sign_flip ^ __cosSeries(__f._M_x);
536  else // some_of(__need_sin)
537  {
538  _V __r = __cosSeries(__f._M_x);
539  where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
540  return __sign_flip ^ __r;
541  }
542  }
543  }
544 
545 template <typename _Tp>
546  _GLIBCXX_SIMD_ALWAYS_INLINE
547  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
548  sin(simd<_Tp, simd_abi::scalar> __x)
549  { return std::sin(__data(__x)); }
550 
551 //}}}
552 _GLIBCXX_SIMD_MATH_CALL_(tan)
553 _GLIBCXX_SIMD_MATH_CALL_(acosh)
554 _GLIBCXX_SIMD_MATH_CALL_(asinh)
555 _GLIBCXX_SIMD_MATH_CALL_(atanh)
556 _GLIBCXX_SIMD_MATH_CALL_(cosh)
557 _GLIBCXX_SIMD_MATH_CALL_(sinh)
558 _GLIBCXX_SIMD_MATH_CALL_(tanh)
559 // }}}
560 // exponential functions {{{
561 _GLIBCXX_SIMD_MATH_CALL_(exp)
562 _GLIBCXX_SIMD_MATH_CALL_(exp2)
563 _GLIBCXX_SIMD_MATH_CALL_(expm1)
564 
565 // }}}
566 // frexp {{{
567 #if _GLIBCXX_SIMD_X86INTRIN
568 template <typename _Tp, size_t _Np>
569  _SimdWrapper<_Tp, _Np>
570  __getexp(_SimdWrapper<_Tp, _Np> __x)
571  {
572  if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
573  return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
574  else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
575  return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
576  else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
577  return _mm_getexp_pd(__x);
578  else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
579  return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
580  else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
581  return _mm256_getexp_ps(__x);
582  else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
583  return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
584  else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
585  return _mm256_getexp_pd(__x);
586  else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
587  return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
588  else if constexpr (__is_avx512_ps<_Tp, _Np>())
589  return _mm512_getexp_ps(__x);
590  else if constexpr (__is_avx512_pd<_Tp, _Np>())
591  return _mm512_getexp_pd(__x);
592  else
593  __assert_unreachable<_Tp>();
594  }
595 
596 template <typename _Tp, size_t _Np>
597  _SimdWrapper<_Tp, _Np>
598  __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
599  {
600  if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
601  return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
602  _MM_MANT_SIGN_src));
603  else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
604  return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
605  _MM_MANT_NORM_p5_1,
606  _MM_MANT_SIGN_src));
607  else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
608  return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
609  else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
610  return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
611  _MM_MANT_SIGN_src));
612  else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
613  return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
614  else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
615  return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
616  _MM_MANT_SIGN_src));
617  else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
618  return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
619  else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
620  return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
621  _MM_MANT_SIGN_src));
622  else if constexpr (__is_avx512_ps<_Tp, _Np>())
623  return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
624  else if constexpr (__is_avx512_pd<_Tp, _Np>())
625  return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
626  else
627  __assert_unreachable<_Tp>();
628  }
629 #endif // _GLIBCXX_SIMD_X86INTRIN
630 
631 /**
632  * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
633  *
634  * The return value will be in the range [0.5, 1.0[
635  * The @p __e value will be an integer defining the power-of-two exponent
636  */
637 template <typename _Tp, typename _Abi>
638  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
639  frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
640  {
641  if constexpr (simd_size_v<_Tp, _Abi> == 1)
642  {
643  int __tmp;
644  const auto __r = std::frexp(__x[0], &__tmp);
645  (*__exp)[0] = __tmp;
646  return __r;
647  }
648  else if constexpr (__is_fixed_size_abi_v<_Abi>)
649  return {__private_init, _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
650 #if _GLIBCXX_SIMD_X86INTRIN
651  else if constexpr (__have_avx512f)
652  {
653  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
654  constexpr size_t _NI = _Np < 4 ? 4 : _Np;
655  const auto __v = __data(__x);
656  const auto __isnonzero
657  = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
658  const _SimdWrapper<int, _NI> __exp_plus1
659  = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
660  const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
661  _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
662  _SimdWrapper<int, _NI>(), __exp_plus1));
663  simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
664  return {__private_init,
665  _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
666  __isnonzero),
667  __v, __getmant_avx512(__v))};
668  }
669 #endif // _GLIBCXX_SIMD_X86INTRIN
670  else
671  {
672  // fallback implementation
673  static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
674  using _V = simd<_Tp, _Abi>;
675  using _IV = rebind_simd_t<int, _V>;
676  using namespace std::experimental::__proposed;
677  using namespace std::experimental::__float_bitwise_operators;
678 
679  constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
680  constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
681  constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
682  _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
683  = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
684  _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
685  = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
686 
687  _V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
688  const _IV __exponent_bits = __extract_exponent_as_int(__x);
689  if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
690  {
691  *__exp
692  = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
693  return __mant;
694  }
695 
696 #if __FINITE_MATH_ONLY__
697  // at least one element of __x is 0 or subnormal, the rest is normal
698  // (inf and NaN are excluded by -ffinite-math-only)
699  const auto __iszero_inf_nan = __x == 0;
700 #else
701  using _Ip = __int_for_sizeof_t<_Tp>;
702  const auto __as_int = simd_bit_cast<rebind_simd_t<_Ip, _V>>(abs(__x));
703  const auto __inf = simd_bit_cast<rebind_simd_t<_Ip, _V>>(_V(__infinity_v<_Tp>));
704  const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
705  __as_int == 0 || __as_int >= __inf);
706 #endif
707 
708  const _V __scaled_subnormal = __x * __subnorm_scale;
709  const _V __mant_subnormal
710  = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
711  where(!isnormal(__x), __mant) = __mant_subnormal;
712  where(__iszero_inf_nan, __mant) = __x;
713  _IV __e = __extract_exponent_as_int(__scaled_subnormal);
714  using _MaskType =
715  typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
716  _V, _IV>::mask_type;
717  const _MaskType __value_isnormal = isnormal(__x).__cvt();
718  where(__value_isnormal.__cvt(), __e) = __exponent_bits;
719  static_assert(sizeof(_IV) == sizeof(__value_isnormal));
720  const _IV __offset
721  = (simd_bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
722  | (simd_bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
723  & static_simd_cast<_MaskType>(__x != 0))
724  & _IV(__exp_adjust + __exp_offset));
725  *__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
726  return __mant;
727  }
728  }
729 
730 // }}}
731 _GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
732 _GLIBCXX_SIMD_MATH_CALL_(ilogb)
733 
734 // logarithms {{{
735 _GLIBCXX_SIMD_MATH_CALL_(log)
736 _GLIBCXX_SIMD_MATH_CALL_(log10)
737 _GLIBCXX_SIMD_MATH_CALL_(log1p)
738 _GLIBCXX_SIMD_MATH_CALL_(log2)
739 
740 //}}}
741 // logb{{{
742 template <typename _Tp, typename _Abi>
743  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
744  logb(const simd<_Tp, _Abi>& __x)
745  {
746  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
747  if constexpr (_Np == 1)
748  return std::logb(__x[0]);
749  else if constexpr (__is_fixed_size_abi_v<_Abi>)
750  return {__private_init, _Abi::_SimdImpl::_S_logb(__data(__x))};
751 #if _GLIBCXX_SIMD_X86INTRIN // {{{
752  else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
753  return {__private_init,
754  __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
755  else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
756  return {__private_init, _mm_getexp_pd(__data(__x))};
757  else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
758  return {__private_init, _mm256_getexp_ps(__data(__x))};
759  else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
760  return {__private_init, _mm256_getexp_pd(__data(__x))};
761  else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
762  return {__private_init,
763  __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
764  else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
765  return {__private_init,
766  __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
767  else if constexpr (__is_avx512_ps<_Tp, _Np>())
768  return {__private_init, _mm512_getexp_ps(__data(__x))};
769  else if constexpr (__is_avx512_pd<_Tp, _Np>())
770  return {__private_init, _mm512_getexp_pd(__data(__x))};
771 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
772  else
773  {
774  using _V = simd<_Tp, _Abi>;
775  using namespace std::experimental::__proposed;
776  auto __is_normal = isnormal(__x);
777 
778  // work on abs(__x) to reflect the return value on Linux for negative
779  // inputs (domain-error => implementation-defined value is returned)
780  const _V abs_x = abs(__x);
781 
782  // __exponent(__x) returns the exponent value (bias removed) as
783  // simd<_Up> with integral _Up
784  auto&& __exponent = [](const _V& __v) {
785  using namespace std::experimental::__proposed;
786  using _IV = rebind_simd_t<
787  conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
788  return (simd_bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
789  - (__max_exponent_v<_Tp> - 1);
790  };
791  _V __r = static_simd_cast<_V>(__exponent(abs_x));
792  if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
793  // without corner cases (nan, inf, subnormal, zero) we have our
794  // answer:
795  return __r;
796  const auto __is_zero = __x == 0;
797  const auto __is_nan = isnan(__x);
798  const auto __is_inf = isinf(__x);
799  where(__is_zero, __r) = -__infinity_v<_Tp>;
800  where(__is_nan, __r) = __x;
801  where(__is_inf, __r) = __infinity_v<_Tp>;
802  __is_normal |= __is_zero || __is_nan || __is_inf;
803  if (all_of(__is_normal))
804  // at this point everything but subnormals is handled
805  return __r;
806  // subnormals repeat the exponent extraction after multiplication of the
807  // input with __a floating point value that has 112 (0x70) in its exponent
808  // (not too big for sp and large enough for dp)
809  const _V __scaled = abs_x * _Tp(0x1p112);
810  _V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
811  where(__is_normal, __scaled_exp) = __r;
812  return __scaled_exp;
813  }
814  }
815 
816 //}}}
817 template <typename _Tp, typename _Abi>
818  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
819  modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
820  {
821  if constexpr (simd_size_v<_Tp, _Abi> == 1)
822  {
823  _Tp __tmp;
824  _Tp __r = std::modf(__x[0], &__tmp);
825  __iptr[0] = __tmp;
826  return __r;
827  }
828  else
829  {
830  const auto __integral = trunc(__x);
831  *__iptr = __integral;
832  auto __r = __x - __integral;
833 #if !__FINITE_MATH_ONLY__
834  where(isinf(__x), __r) = _Tp();
835 #endif
836  return copysign(__r, __x);
837  }
838  }
839 
840 _GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
841 _GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
842 
843 _GLIBCXX_SIMD_MATH_CALL_(cbrt)
844 
845 _GLIBCXX_SIMD_MATH_CALL_(abs)
846 _GLIBCXX_SIMD_MATH_CALL_(fabs)
847 
848 // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
849 // allow signed integral _Tp
850 template <typename _Tp, typename _Abi>
851  enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
852  abs(const simd<_Tp, _Abi>& __x)
853  { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
854 
855 #define _GLIBCXX_SIMD_CVTING2(_NAME) \
856 template <typename _Tp, typename _Abi> \
857  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
858  const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
859  { \
860  return _NAME(__x, __y); \
861  } \
862  \
863 template <typename _Tp, typename _Abi> \
864  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
865  const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
866  { \
867  return _NAME(__x, __y); \
868  }
869 
870 #define _GLIBCXX_SIMD_CVTING3(_NAME) \
871 template <typename _Tp, typename _Abi> \
872  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
873  const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
874  const simd<_Tp, _Abi>& __z) \
875  { \
876  return _NAME(__x, __y, __z); \
877  } \
878  \
879 template <typename _Tp, typename _Abi> \
880  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
881  const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
882  const simd<_Tp, _Abi>& __z) \
883  { \
884  return _NAME(__x, __y, __z); \
885  } \
886  \
887 template <typename _Tp, typename _Abi> \
888  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
889  const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \
890  const __type_identity_t<simd<_Tp, _Abi>>& __z) \
891  { \
892  return _NAME(__x, __y, __z); \
893  } \
894  \
895 template <typename _Tp, typename _Abi> \
896  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
897  const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
898  const __type_identity_t<simd<_Tp, _Abi>>& __z) \
899  { \
900  return _NAME(__x, __y, __z); \
901  } \
902  \
903 template <typename _Tp, typename _Abi> \
904  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
905  const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
906  const __type_identity_t<simd<_Tp, _Abi>>& __z) \
907  { \
908  return _NAME(__x, __y, __z); \
909  } \
910  \
911 template <typename _Tp, typename _Abi> \
912  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
913  const __type_identity_t<simd<_Tp, _Abi>>& __x, \
914  const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
915  { \
916  return _NAME(__x, __y, __z); \
917  }
918 
919 template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
920  _GLIBCXX_SIMD_INTRINSIC _R
921  __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
922  const _Tps&... __args)
923  {
924  return {__private_init,
925  __data(__arg0)._M_apply_per_chunk(
926  [&](auto __impl, const auto&... __inner) {
927  using _V = typename decltype(__impl)::simd_type;
928  return __data(__apply(_V(__private_init, __inner)...));
929  },
930  __data(__args)...)};
931  }
932 
933 template <typename _VV>
934  __remove_cvref_t<_VV>
935  __hypot(_VV __x, _VV __y)
936  {
937  using _V = __remove_cvref_t<_VV>;
938  using _Tp = typename _V::value_type;
939  if constexpr (_V::size() == 1)
940  return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
941  else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
942  {
943  return __fixed_size_apply<_V>([](auto __a,
944  auto __b) { return hypot(__a, __b); },
945  __x, __y);
946  }
947  else
948  {
949  // A simple solution for _Tp == float would be to cast to double and
950  // simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
951  // dp. It still needs the Annex F fixups though and isn't faster on
952  // Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
953  // AVX-512).
954  using namespace __float_bitwise_operators;
955  using namespace __proposed;
956  _V __absx = abs(__x); // no error
957  _V __absy = abs(__y); // no error
958  _V __hi = max(__absx, __absy); // no error
959  _V __lo = min(__absy, __absx); // no error
960 
961  // round __hi down to the next power-of-2:
962  _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
963 
964 #ifndef __FAST_MATH__
965  if constexpr (__have_neon && !__have_neon_a32)
966  { // With ARMv7 NEON, we have no subnormals and must use slightly
967  // different strategy
968  const _V __hi_exp = __hi & __inf;
969  _V __scale_back = __hi_exp;
970  // For large exponents (max & max/2) the inversion comes too close
971  // to subnormals. Subtract 3 from the exponent:
972  where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
973  // Invert and adjust for the off-by-one error of inversion via xor:
974  const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
975  const _V __h1 = __hi * __scale;
976  const _V __l1 = __lo * __scale;
977  _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
978  // Fix up hypot(0, 0) to not be NaN:
979  where(__hi == 0, __r) = 0;
980  return __r;
981  }
982 #endif
983 
984 #ifdef __FAST_MATH__
985  // With fast-math, ignore precision of subnormals and inputs from
986  // __finite_max_v/2 to __finite_max_v. This removes all
987  // branching/masking.
988  if constexpr (true)
989 #else
990  if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
991  && all_of(isnormal(__y))))
992 #endif
993  {
994  const _V __hi_exp = __hi & __inf;
995  //((__hi + __hi) & __inf) ^ __inf almost works for computing
996  //__scale,
997  // except when (__hi + __hi) & __inf == __inf, in which case __scale
998  // becomes 0 (should be min/2 instead) and thus loses the
999  // information from __lo.
1000 #ifdef __FAST_MATH__
1001  using _Ip = __int_for_sizeof_t<_Tp>;
1002  using _IV = rebind_simd_t<_Ip, _V>;
1003  const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
1004  const _V __scale
1005  = simd_bit_cast<_V>(2 * simd_bit_cast<_Ip>(_Tp(1)) - __as_int);
1006 #else
1007  const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1008 #endif
1009  _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
1010  = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1011  const _V __h1 = (__hi & __mant_mask) | _V(1);
1012  const _V __l1 = __lo * __scale;
1013  return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1014  }
1015  else
1016  {
1017  // slower path to support subnormals
1018  // if __hi is subnormal, avoid scaling by inf & final mul by 0
1019  // (which yields NaN) by using min()
1020  _V __scale = _V(1 / __norm_min_v<_Tp>);
1021  // invert exponent w/o error and w/o using the slow divider unit:
1022  // xor inverts the exponent but off by 1. Multiplication with .5
1023  // adjusts for the discrepancy.
1024  where(__hi >= __norm_min_v<_Tp>, __scale)
1025  = ((__hi & __inf) ^ __inf) * _Tp(.5);
1026  // adjust final exponent for subnormal inputs
1027  _V __hi_exp = __norm_min_v<_Tp>;
1028  where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1029  = __hi & __inf; // no error
1030  _V __h1 = __hi * __scale; // no error
1031  _V __l1 = __lo * __scale; // no error
1032 
1033  // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
1034  // this ensures no overflow in the argument to sqrt
1035  _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1036 #ifdef __STDC_IEC_559__
1037  // fixup for Annex F requirements
1038  // the naive fixup goes like this:
1039  //
1040  // where(__l1 == 0, __r) = __hi;
1041  // where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>;
1042  // where(isinf(__absx) || isinf(__absy), __r) = __inf;
1043  //
1044  // The fixup can be prepared in parallel with the sqrt, requiring a
1045  // single blend step after hi_exp * sqrt, reducing latency and
1046  // throughput:
1047  _V __fixup = __hi; // __lo == 0
1048  where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
1049  where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
1050  where(!(__lo == 0 || isunordered(__x, __y)
1051  || (isinf(__absx) || isinf(__absy))),
1052  __fixup)
1053  = __r;
1054  __r = __fixup;
1055 #endif
1056  return __r;
1057  }
1058  }
1059  }
1060 
1061 template <typename _Tp, typename _Abi>
1062  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1063  hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1064  {
1065  return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1066  const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1067  __y);
1068  }
1069 
1070 _GLIBCXX_SIMD_CVTING2(hypot)
1071 
1072  template <typename _VV>
1073  __remove_cvref_t<_VV>
1074  __hypot(_VV __x, _VV __y, _VV __z)
1075  {
1076  using _V = __remove_cvref_t<_VV>;
1077  using _Abi = typename _V::abi_type;
1078  using _Tp = typename _V::value_type;
1079  /* FIXME: enable after PR77776 is resolved
1080  if constexpr (_V::size() == 1)
1081  return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
1082  else
1083  */
1084  if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
1085  {
1086  return __fixed_size_apply<simd<_Tp, _Abi>>(
1087  [](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); },
1088  __x, __y, __z);
1089  }
1090  else
1091  {
1092  using namespace __float_bitwise_operators;
1093  using namespace __proposed;
1094  const _V __absx = abs(__x); // no error
1095  const _V __absy = abs(__y); // no error
1096  const _V __absz = abs(__z); // no error
1097  _V __hi = max(max(__absx, __absy), __absz); // no error
1098  _V __l0 = min(__absz, max(__absx, __absy)); // no error
1099  _V __l1 = min(__absy, __absx); // no error
1100  if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
1101  && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
1102  { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
1103  // NaN. In this case the bit-tricks don't work, they require IEC559
1104  // binary32 or binary64 format.
1105 #ifdef __STDC_IEC_559__
1106  // fixup for Annex F requirements
1107  if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
1108  return __infinity_v<_Tp>;
1109  else if (isunordered(__absx[0], __absy[0] + __absz[0]))
1110  return __quiet_NaN_v<_Tp>;
1111  else if (__l0[0] == 0 && __l1[0] == 0)
1112  return __hi;
1113 #endif
1114  _V __hi_exp = __hi;
1115  const _ULLong __tmp = 0x8000'0000'0000'0000ull;
1116  __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
1117  const _V __scale = 1 / __hi_exp;
1118  __hi *= __scale;
1119  __l0 *= __scale;
1120  __l1 *= __scale;
1121  return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
1122  }
1123  else
1124  {
1125  // round __hi down to the next power-of-2:
1126  _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
1127 
1128 #ifndef __FAST_MATH__
1129  if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
1130  { // With ARMv7 NEON, we have no subnormals and must use slightly
1131  // different strategy
1132  const _V __hi_exp = __hi & __inf;
1133  _V __scale_back = __hi_exp;
1134  // For large exponents (max & max/2) the inversion comes too
1135  // close to subnormals. Subtract 3 from the exponent:
1136  where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
1137  // Invert and adjust for the off-by-one error of inversion via
1138  // xor:
1139  const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
1140  const _V __h1 = __hi * __scale;
1141  __l0 *= __scale;
1142  __l1 *= __scale;
1143  _V __lo = __l0 * __l0
1144  + __l1 * __l1; // add the two smaller values first
1145  asm("" : "+m"(__lo));
1146  _V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
1147  // Fix up hypot(0, 0, 0) to not be NaN:
1148  where(__hi == 0, __r) = 0;
1149  return __r;
1150  }
1151 #endif
1152 
1153 #ifdef __FAST_MATH__
1154  // With fast-math, ignore precision of subnormals and inputs from
1155  // __finite_max_v/2 to __finite_max_v. This removes all
1156  // branching/masking.
1157  if constexpr (true)
1158 #else
1159  if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1160  && all_of(isnormal(__y))
1161  && all_of(isnormal(__z))))
1162 #endif
1163  {
1164  const _V __hi_exp = __hi & __inf;
1165  //((__hi + __hi) & __inf) ^ __inf almost works for computing
1166  //__scale, except when (__hi + __hi) & __inf == __inf, in which
1167  // case __scale
1168  // becomes 0 (should be min/2 instead) and thus loses the
1169  // information from __lo.
1170 #ifdef __FAST_MATH__
1171  using _Ip = __int_for_sizeof_t<_Tp>;
1172  using _IV = rebind_simd_t<_Ip, _V>;
1173  const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
1174  const _V __scale
1175  = simd_bit_cast<_V>(2 * simd_bit_cast<_Ip>(_Tp(1)) - __as_int);
1176 #else
1177  const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1178 #endif
1179  constexpr _Tp __mant_mask
1180  = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1181  const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
1182  __l0 *= __scale;
1183  __l1 *= __scale;
1184  const _V __lo
1185  = __l0 * __l0
1186  + __l1 * __l1; // add the two smaller values first
1187  return __hi_exp * sqrt(__lo + __h1 * __h1);
1188  }
1189  else
1190  {
1191  // slower path to support subnormals
1192  // if __hi is subnormal, avoid scaling by inf & final mul by 0
1193  // (which yields NaN) by using min()
1194  _V __scale = _V(1 / __norm_min_v<_Tp>);
1195  // invert exponent w/o error and w/o using the slow divider
1196  // unit: xor inverts the exponent but off by 1. Multiplication
1197  // with .5 adjusts for the discrepancy.
1198  where(__hi >= __norm_min_v<_Tp>, __scale)
1199  = ((__hi & __inf) ^ __inf) * _Tp(.5);
1200  // adjust final exponent for subnormal inputs
1201  _V __hi_exp = __norm_min_v<_Tp>;
1202  where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1203  = __hi & __inf; // no error
1204  _V __h1 = __hi * __scale; // no error
1205  __l0 *= __scale; // no error
1206  __l1 *= __scale; // no error
1207  _V __lo = __l0 * __l0
1208  + __l1 * __l1; // add the two smaller values first
1209  _V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
1210 #ifdef __STDC_IEC_559__
1211  // fixup for Annex F requirements
1212  _V __fixup = __hi; // __lo == 0
1213  // where(__lo == 0, __fixup) = __hi;
1214  where(isunordered(__x, __y + __z), __fixup)
1215  = __quiet_NaN_v<_Tp>;
1216  where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
1217  = __inf;
1218  // Instead of __lo == 0, the following could depend on __h1² ==
1219  // __h1² + __lo (i.e. __hi is so much larger than the other two
1220  // inputs that the result is exactly __hi). While this may
1221  // improve precision, it is likely to reduce efficiency if the
1222  // ISA has FMAs (because __h1² + __lo is an FMA, but the
1223  // intermediate
1224  // __h1² must be kept)
1225  where(!(__lo == 0 || isunordered(__x, __y + __z)
1226  || isinf(__absx) || isinf(__absy) || isinf(__absz)),
1227  __fixup)
1228  = __r;
1229  __r = __fixup;
1230 #endif
1231  return __r;
1232  }
1233  }
1234  }
1235  }
1236 
1237  template <typename _Tp, typename _Abi>
1238  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1239  hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
1240  const simd<_Tp, _Abi>& __z)
1241  {
1242  return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1243  const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1244  __y,
1245  __z);
1246  }
1247 
1248 _GLIBCXX_SIMD_CVTING3(hypot)
1249 
1250 _GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
1251 
1252 _GLIBCXX_SIMD_MATH_CALL_(sqrt)
1253 _GLIBCXX_SIMD_MATH_CALL_(erf)
1254 _GLIBCXX_SIMD_MATH_CALL_(erfc)
1255 _GLIBCXX_SIMD_MATH_CALL_(lgamma)
1256 _GLIBCXX_SIMD_MATH_CALL_(tgamma)
1257 _GLIBCXX_SIMD_MATH_CALL_(ceil)
1258 _GLIBCXX_SIMD_MATH_CALL_(floor)
1259 _GLIBCXX_SIMD_MATH_CALL_(nearbyint)
1260 _GLIBCXX_SIMD_MATH_CALL_(rint)
1261 _GLIBCXX_SIMD_MATH_CALL_(lrint)
1262 _GLIBCXX_SIMD_MATH_CALL_(llrint)
1263 
1264 _GLIBCXX_SIMD_MATH_CALL_(round)
1265 _GLIBCXX_SIMD_MATH_CALL_(lround)
1266 _GLIBCXX_SIMD_MATH_CALL_(llround)
1267 
1268 _GLIBCXX_SIMD_MATH_CALL_(trunc)
1269 
1270 _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
1271 _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
1272 _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
1273 
1274 template <typename _Tp, typename _Abi>
1275  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1276  copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1277  {
1278  if constexpr (simd_size_v<_Tp, _Abi> == 1)
1279  return std::copysign(__x[0], __y[0]);
1280  else if constexpr (__is_fixed_size_abi_v<_Abi>)
1281  return {__private_init, _Abi::_SimdImpl::_S_copysign(__data(__x), __data(__y))};
1282  else
1283  {
1284  using _V = simd<_Tp, _Abi>;
1285  using namespace std::experimental::__float_bitwise_operators;
1286  _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
1287  return (__x & ~__signmask) | (__y & __signmask);
1288  }
1289  }
1290 
1291 _GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
1292 // not covered in [parallel.simd.math]:
1293 // _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
1294 _GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
1295 _GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
1296 _GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
1297 
1298 _GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
1299 _GLIBCXX_SIMD_MATH_CALL_(fpclassify)
1300 _GLIBCXX_SIMD_MATH_CALL_(isfinite)
1301 
1302 // isnan and isinf require special treatment because old glibc may declare
1303 // `int isinf(double)`.
1304 template <typename _Tp, typename _Abi, typename...,
1305  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1306  enable_if_t<is_floating_point_v<_Tp>, _R>
1307  isinf(simd<_Tp, _Abi> __x)
1308  { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
1309 
1310 template <typename _Tp, typename _Abi, typename...,
1311  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1312  enable_if_t<is_floating_point_v<_Tp>, _R>
1313  isnan(simd<_Tp, _Abi> __x)
1314  { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
1315 
1316 _GLIBCXX_SIMD_MATH_CALL_(isnormal)
1317 
1318 template <typename..., typename _Tp, typename _Abi>
1319  simd_mask<_Tp, _Abi>
1320  signbit(simd<_Tp, _Abi> __x)
1321  {
1322  if constexpr (is_integral_v<_Tp>)
1323  {
1324  if constexpr (is_unsigned_v<_Tp>)
1325  return simd_mask<_Tp, _Abi>{}; // false
1326  else
1327  return __x < 0;
1328  }
1329  else
1330  return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
1331  }
1332 
1333 _GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
1334 _GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
1335 _GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
1336 _GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
1337 _GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
1338 _GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
1339 
1340 /* not covered in [parallel.simd.math]
1341 template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
1342 template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
1343 template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
1344 
1345 template <typename _V> struct simd_div_t {
1346  _V quot, rem;
1347 };
1348 
1349 template <typename _Abi>
1350 simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
1351  _SCharv<_Abi> denom);
1352 template <typename _Abi>
1353 simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
1354  __shortv<_Abi> denom);
1355 template <typename _Abi>
1356 simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
1357 template <typename _Abi>
1358 simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
1359  __longv<_Abi> denom);
1360 template <typename _Abi>
1361 simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
1362  __llongv<_Abi> denom);
1363 */
1364 
1365 // special math {{{
1366 template <typename _Tp, typename _Abi>
1367  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1368  assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1369  const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1370  const simd<_Tp, _Abi>& __x)
1371  {
1372  return simd<_Tp, _Abi>([&](auto __i) {
1373  return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
1374  });
1375  }
1376 
1377 template <typename _Tp, typename _Abi>
1378  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1379  assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1380  const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1381  const simd<_Tp, _Abi>& __x)
1382  {
1383  return simd<_Tp, _Abi>([&](auto __i) {
1384  return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
1385  });
1386  }
1387 
1388 _GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
1389 _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
1390 _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
1391 _GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
1392 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
1393 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
1394 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
1395 _GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
1396 _GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
1397 _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
1398 _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
1399 _GLIBCXX_SIMD_MATH_CALL_(expint)
1400 
1401 template <typename _Tp, typename _Abi>
1402  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1403  hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1404  const simd<_Tp, _Abi>& __x)
1405  {
1406  return simd<_Tp, _Abi>(
1407  [&](auto __i) { return std::hermite(__n[__i], __x[__i]); });
1408  }
1409 
1410 template <typename _Tp, typename _Abi>
1411  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1412  laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1413  const simd<_Tp, _Abi>& __x)
1414  {
1415  return simd<_Tp, _Abi>(
1416  [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); });
1417  }
1418 
1419 template <typename _Tp, typename _Abi>
1420  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1421  legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1422  const simd<_Tp, _Abi>& __x)
1423  {
1424  return simd<_Tp, _Abi>(
1425  [&](auto __i) { return std::legendre(__n[__i], __x[__i]); });
1426  }
1427 
1428 _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
1429 
1430 template <typename _Tp, typename _Abi>
1431  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1432  sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1433  const simd<_Tp, _Abi>& __x)
1434  {
1435  return simd<_Tp, _Abi>(
1436  [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); });
1437  }
1438 
1439 template <typename _Tp, typename _Abi>
1440  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1441  sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
1442  const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1443  const simd<_Tp, _Abi>& theta)
1444  {
1445  return simd<_Tp, _Abi>([&](auto __i) {
1446  return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
1447  });
1448  }
1449 
1450 template <typename _Tp, typename _Abi>
1451  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1452  sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1453  const simd<_Tp, _Abi>& __x)
1454  {
1455  return simd<_Tp, _Abi>(
1456  [&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); });
1457  }
1458 // }}}
1459 
1460 #undef _GLIBCXX_SIMD_CVTING2
1461 #undef _GLIBCXX_SIMD_CVTING3
1462 #undef _GLIBCXX_SIMD_MATH_CALL_
1463 #undef _GLIBCXX_SIMD_MATH_CALL2_
1464 #undef _GLIBCXX_SIMD_MATH_CALL3_
1465 
1466 _GLIBCXX_SIMD_END_NAMESPACE
1467 
1468 #endif // __cplusplus >= 201703L
1469 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
1470 
1471 // vim: foldmethod=marker sw=2 ts=8 noet sts=2
complex< _Tp > log10(const complex< _Tp > &)
Return complex base 10 logarithm of z.
Definition: complex:829
complex< _Tp > sin(const complex< _Tp > &)
Return complex sine of z.
Definition: complex:859
complex< _Tp > log(const complex< _Tp > &)
Return complex natural logarithm of z.
Definition: complex:824
complex< _Tp > tan(const complex< _Tp > &)
Return complex tangent of z.
Definition: complex:960
_Tp abs(const complex< _Tp > &)
Return magnitude of z.
Definition: complex:630
complex< _Tp > exp(const complex< _Tp > &)
Return complex base e exponential of z.
Definition: complex:797
complex< _Tp > cosh(const complex< _Tp > &)
Return complex hyperbolic cosine of z.
Definition: complex:771
complex< _Tp > tanh(const complex< _Tp > &)
Return complex hyperbolic tangent of z.
Definition: complex:988
complex< _Tp > pow(const complex< _Tp > &, int)
Return x to the y'th power.
Definition: complex:1019
complex< _Tp > sinh(const complex< _Tp > &)
Return complex hyperbolic sine of z.
Definition: complex:889
complex< _Tp > cos(const complex< _Tp > &)
Return complex cosine of z.
Definition: complex:741
complex< _Tp > sqrt(const complex< _Tp > &)
Return complex square root of z.
Definition: complex:933
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition: type_traits:2009
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2618
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition: type_traits:2393
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:254
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:230
__gnu_cxx::__promote_2< _Tpnu, _Tp >::__type cyl_bessel_i(_Tpnu __nu, _Tp __x)
Definition: specfun.h:535
__gnu_cxx::__promote< _Tp >::__type sph_neumann(unsigned int __n, _Tp __x)
Definition: specfun.h:1193
__gnu_cxx::__promote_3< _Tp, _Tpn, _Tpp >::__type ellint_3(_Tp __k, _Tpn __nu, _Tpp __phi)
Return the incomplete elliptic integral of the third kind .
Definition: specfun.h:830
__gnu_cxx::__promote< _Tp >::__type comp_ellint_2(_Tp __k)
Definition: specfun.h:438
__gnu_cxx::__promote< _Tp >::__type assoc_legendre(unsigned int __l, unsigned int __m, _Tp __x)
Definition: specfun.h:298
__gnu_cxx::__promote< _Tp >::__type assoc_laguerre(unsigned int __n, unsigned int __m, _Tp __x)
Definition: specfun.h:252
__gnu_cxx::__promote< _Tp >::__type sph_bessel(unsigned int __n, _Tp __x)
Definition: specfun.h:1102
__gnu_cxx::__promote_2< _Tpnu, _Tp >::__type cyl_bessel_j(_Tpnu __nu, _Tp __x)
Definition: specfun.h:581
__gnu_cxx::__promote< _Tp >::__type sph_legendre(unsigned int __l, unsigned int __m, _Tp __theta)
Definition: specfun.h:1149
__gnu_cxx::__promote_2< _Tpnu, _Tp >::__type cyl_neumann(_Tpnu __nu, _Tp __x)
Definition: specfun.h:681
__gnu_cxx::__promote< _Tp >::__type riemann_zeta(_Tp __s)
Definition: specfun.h:1058
__gnu_cxx::__promote_2< _Tpa, _Tpb >::__type beta(_Tpa __a, _Tpb __b)
Definition: specfun.h:343
__gnu_cxx::__promote_2< _Tpnu, _Tp >::__type cyl_bessel_k(_Tpnu __nu, _Tp __x)
Definition: specfun.h:633
__gnu_cxx::__promote< _Tp >::__type expint(_Tp __x)
Definition: specfun.h:870
__gnu_cxx::__promote< _Tp >::__type hermite(unsigned int __n, _Tp __x)
Definition: specfun.h:918
__gnu_cxx::__promote< _Tp >::__type comp_ellint_1(_Tp __k)
Definition: specfun.h:391
__gnu_cxx::__promote< _Tp >::__type laguerre(unsigned int __n, _Tp __x)
Definition: specfun.h:962
__gnu_cxx::__promote_2< _Tp, _Tpp >::__type ellint_2(_Tp __k, _Tpp __phi)
Definition: specfun.h:777
__gnu_cxx::__promote_2< _Tp, _Tpn >::__type comp_ellint_3(_Tp __k, _Tpn __nu)
Definition: specfun.h:489
__gnu_cxx::__promote_2< _Tp, _Tpp >::__type ellint_1(_Tp __k, _Tpp __phi)
Definition: specfun.h:729
__gnu_cxx::__promote< _Tp >::__type legendre(unsigned int __l, _Tp __x)
Definition: specfun.h:1007
ISO C++ entities toplevel namespace is std.
_Tp fabs(const std::complex< _Tp > &)
fabs(__z) [8.1.8].
Definition: complex:1817
std::complex< _Tp > asinh(const std::complex< _Tp > &)
asinh(__z) [8.1.6].
Definition: complex:1764
std::complex< _Tp > atan(const std::complex< _Tp > &)
atan(__z) [8.1.4].
Definition: complex:1689
std::complex< _Tp > atanh(const std::complex< _Tp > &)
atanh(__z) [8.1.7].
Definition: complex:1808
std::complex< _Tp > acosh(const std::complex< _Tp > &)
acosh(__z) [8.1.5].
Definition: complex:1725
std::complex< _Tp > acos(const std::complex< _Tp > &)
acos(__z) [8.1.2].
Definition: complex:1609
std::complex< _Tp > asin(const std::complex< _Tp > &)
asin(__z) [8.1.3].
Definition: complex:1645