Skip to content

Commit a66f719

Browse files
committed
[libc][math] Refactor cos implementation to header-only in src/__support/math folder.
1 parent ae312dc commit a66f719

File tree

16 files changed

+377
-278
lines changed

16 files changed

+377
-278
lines changed

libc/shared/math.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "math/atanhf16.h"
3333
#include "math/cbrt.h"
3434
#include "math/cbrtf.h"
35+
#include "math/cos.h"
3536
#include "math/erff.h"
3637
#include "math/exp.h"
3738
#include "math/exp10.h"

libc/shared/math/cos.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===-- Shared cos function -------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SHARED_MATH_COS_H
10+
#define LLVM_LIBC_SHARED_MATH_COS_H
11+
12+
#include "shared/libc_common.h"
13+
#include "src/__support/math/cos.h"
14+
15+
namespace LIBC_NAMESPACE_DECL {
16+
namespace shared {
17+
18+
using math::cos;
19+
20+
} // namespace shared
21+
} // namespace LIBC_NAMESPACE_DECL
22+
23+
#endif // LLVM_LIBC_SHARED_MATH_COS_H

libc/src/__support/math/CMakeLists.txt

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,24 @@ add_header_library(
357357
libc.src.__support.macros.optimization
358358
)
359359

360+
add_header_library(
361+
cos
362+
HDRS
363+
cos.h
364+
DEPENDS
365+
libc.src.__support.math.sincos_eval
366+
libc.hdr.errno_macros
367+
libc.src.errno.errno
368+
libc.src.__support.FPUtil.double_double
369+
libc.src.__support.FPUtil.dyadic_float
370+
libc.src.__support.FPUtil.except_value_utils
371+
libc.src.__support.FPUtil.fenv_impl
372+
libc.src.__support.FPUtil.fp_bits
373+
libc.src.__support.math.range_reduction_double
374+
libc.src.__support.macros.optimization
375+
)
376+
377+
360378
add_header_library(
361379
erff
362380
HDRS
@@ -613,3 +631,32 @@ add_header_library(
613631
libc.src.__support.macros.optimization
614632
libc.src.__support.macros.properties.cpu_features
615633
)
634+
635+
add_header_library(
636+
range_reduction_double
637+
HDRS
638+
range_reduction_double_common.h
639+
range_reduction_double_fma.h
640+
range_reduction_double_nofma.h
641+
DEPENDS
642+
libc.src.__support.FPUtil.double_double
643+
libc.src.__support.FPUtil.dyadic_float
644+
libc.src.__support.FPUtil.fp_bits
645+
libc.src.__support.FPUtil.fma
646+
libc.src.__support.FPUtil.multiply_add
647+
libc.src.__support.FPUtil.nearest_integer
648+
libc.src.__support.common
649+
libc.src.__support.integer_literals
650+
)
651+
652+
add_header_library(
653+
sincos_eval
654+
HDRS
655+
sincos_eval.h
656+
DEPENDS
657+
libc.src.__support.FPUtil.double_double
658+
libc.src.__support.FPUtil.dyadic_float
659+
libc.src.__support.FPUtil.multiply_add
660+
libc.src.__support.FPUtil.polyeval
661+
libc.src.__support.integer_literals
662+
)

libc/src/__support/math/cos.h

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
//===-- Implementation header for cos ---------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LIBC_SRC___SUPPORT_MATH_COS_H
10+
#define LIBC_SRC___SUPPORT_MATH_COS_H
11+
12+
#include "range_reduction_double_common.h"
13+
#include "sincos_eval.h"
14+
#include "src/__support/FPUtil/FEnvImpl.h"
15+
#include "src/__support/FPUtil/FPBits.h"
16+
#include "src/__support/FPUtil/double_double.h"
17+
#include "src/__support/FPUtil/dyadic_float.h"
18+
#include "src/__support/FPUtil/except_value_utils.h"
19+
#include "src/__support/macros/config.h"
20+
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
21+
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
22+
23+
#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
24+
#include "range_reduction_double_fma.h"
25+
#else
26+
#include "range_reduction_double_nofma.h"
27+
#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
28+
29+
namespace LIBC_NAMESPACE_DECL {
30+
31+
namespace math {
32+
33+
LIBC_INLINE static constexpr double cos(double x) {
34+
using namespace range_reduction_double_internal;
35+
using DoubleDouble = fputil::DoubleDouble;
36+
using FPBits = typename fputil::FPBits<double>;
37+
FPBits xbits(x);
38+
39+
uint16_t x_e = xbits.get_biased_exponent();
40+
41+
DoubleDouble y;
42+
unsigned k = 0;
43+
LargeRangeReduction range_reduction_large;
44+
45+
// |x| < 2^16.
46+
if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) {
47+
// |x| < 2^-7
48+
if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 7)) {
49+
// |x| < 2^-27
50+
if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) {
51+
// Signed zeros.
52+
if (LIBC_UNLIKELY(x == 0.0))
53+
return 1.0;
54+
55+
// For |x| < 2^-27, |cos(x) - 1| < |x|^2/2 < 2^-54 = ulp(1 - 2^-53)/2.
56+
return fputil::round_result_slightly_down(1.0);
57+
}
58+
// No range reduction needed.
59+
k = 0;
60+
y.lo = 0.0;
61+
y.hi = x;
62+
} else {
63+
// Small range reduction.
64+
k = range_reduction_small(x, y);
65+
}
66+
} else {
67+
// Inf or NaN
68+
if (LIBC_UNLIKELY(x_e > 2 * FPBits::EXP_BIAS)) {
69+
if (xbits.is_signaling_nan()) {
70+
fputil::raise_except_if_required(FE_INVALID);
71+
return FPBits::quiet_nan().get_val();
72+
}
73+
// cos(+-Inf) = NaN
74+
if (xbits.get_mantissa() == 0) {
75+
fputil::set_errno_if_required(EDOM);
76+
fputil::raise_except_if_required(FE_INVALID);
77+
}
78+
return x + FPBits::quiet_nan().get_val();
79+
}
80+
81+
// Large range reduction.
82+
k = range_reduction_large.fast(x, y);
83+
}
84+
85+
DoubleDouble sin_y, cos_y;
86+
87+
[[maybe_unused]] double err =
88+
math::sincos_eval_internal::sincos_eval(y, sin_y, cos_y);
89+
90+
// Look up sin(k * pi/128) and cos(k * pi/128)
91+
#ifdef LIBC_MATH_HAS_SMALL_TABLES
92+
// Memory saving versions. Use 65-entry table.
93+
auto get_idx_dd = [](unsigned kk) -> DoubleDouble {
94+
unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
95+
DoubleDouble ans = SIN_K_PI_OVER_128[idx];
96+
if (kk & 128) {
97+
ans.hi = -ans.hi;
98+
ans.lo = -ans.lo;
99+
}
100+
return ans;
101+
};
102+
DoubleDouble msin_k = get_idx_dd(k + 128);
103+
DoubleDouble cos_k = get_idx_dd(k + 64);
104+
#else
105+
// Fast look up version, but needs 256-entry table.
106+
// -sin(k * pi/128) = sin((k + 128) * pi/128)
107+
// cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
108+
DoubleDouble msin_k = SIN_K_PI_OVER_128[(k + 128) & 255];
109+
DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 255];
110+
#endif // LIBC_MATH_HAS_SMALL_TABLES
111+
112+
// After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128).
113+
// So k is an integer and -pi / 256 <= y <= pi / 256.
114+
// Then cos(x) = cos((k * pi/128 + y)
115+
// = cos(y) * cos(k*pi/128) - sin(y) * sin(k*pi/128)
116+
DoubleDouble cos_k_cos_y = fputil::quick_mult(cos_y, cos_k);
117+
DoubleDouble msin_k_sin_y = fputil::quick_mult(sin_y, msin_k);
118+
119+
DoubleDouble rr = fputil::exact_add<false>(cos_k_cos_y.hi, msin_k_sin_y.hi);
120+
rr.lo += msin_k_sin_y.lo + cos_k_cos_y.lo;
121+
122+
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
123+
return rr.hi + rr.lo;
124+
#else
125+
using Float128 = typename fputil::DyadicFloat<128>;
126+
double rlp = rr.lo + err;
127+
double rlm = rr.lo - err;
128+
129+
double r_upper = rr.hi + rlp; // (rr.lo + ERR);
130+
double r_lower = rr.hi + rlm; // (rr.lo - ERR);
131+
132+
// Ziv's rounding test.
133+
if (LIBC_LIKELY(r_upper == r_lower))
134+
return r_upper;
135+
136+
Float128 u_f128, sin_u, cos_u;
137+
if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))
138+
u_f128 = range_reduction_small_f128(x);
139+
else
140+
u_f128 = range_reduction_large.accurate();
141+
142+
math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u);
143+
144+
auto get_sin_k = [](unsigned kk) -> Float128 {
145+
unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
146+
Float128 ans = SIN_K_PI_OVER_128_F128[idx];
147+
if (kk & 128)
148+
ans.sign = Sign::NEG;
149+
return ans;
150+
};
151+
152+
// -sin(k * pi/128) = sin((k + 128) * pi/128)
153+
// cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
154+
Float128 msin_k_f128 = get_sin_k(k + 128);
155+
Float128 cos_k_f128 = get_sin_k(k + 64);
156+
157+
// cos(x) = cos((k * pi/128 + u)
158+
// = cos(u) * cos(k*pi/128) - sin(u) * sin(k*pi/128)
159+
Float128 r = fputil::quick_add(fputil::quick_mul(cos_k_f128, cos_u),
160+
fputil::quick_mul(msin_k_f128, sin_u));
161+
162+
// TODO: Add assertion if Ziv's accuracy tests fail in debug mode.
163+
// https://github.com/llvm/llvm-project/issues/96452.
164+
165+
return static_cast<double>(r);
166+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
167+
}
168+
169+
} // namespace math
170+
171+
} // namespace LIBC_NAMESPACE_DECL
172+
173+
#endif // LIBC_SRC___SUPPORT_MATH_COS_H

libc/src/math/generic/range_reduction_double_common.h renamed to libc/src/__support/math/range_reduction_double_common.h

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H
10-
#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H
9+
#ifndef LIBC_SRC___SUPPORT_MATH_RANGE_REDUCTION_DOUBLE_COMMON_H
10+
#define LIBC_SRC___SUPPORT_MATH_RANGE_REDUCTION_DOUBLE_COMMON_H
1111

1212
#include "src/__support/FPUtil/double_double.h"
1313
#include "src/__support/FPUtil/dyadic_float.h"
@@ -20,6 +20,10 @@
2020

2121
namespace LIBC_NAMESPACE_DECL {
2222

23+
namespace math {
24+
25+
namespace range_reduction_double_internal {
26+
2327
#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
2428
static constexpr unsigned SPLIT = fputil::DefaultSplit<double>::VALUE;
2529
#else
@@ -40,7 +44,7 @@ using Float128 = LIBC_NAMESPACE::fputil::DyadicFloat<128>;
4044
// Error bound:
4145
// |(x - k * pi/128) - (u_hi + u_lo)| <= max(ulp(ulp(u_hi)), 2^-119)
4246
// <= 2^-111.
43-
LIBC_INLINE unsigned range_reduction_small(double x, DoubleDouble &u) {
47+
LIBC_INLINE static unsigned range_reduction_small(double x, DoubleDouble &u) {
4448
// Values of -pi/128 used for inputs with absolute value <= 2^16.
4549
// The first 3 parts are generated with (53 - 21 = 32)-bit precision, so that
4650
// the product k * MPI_OVER_128[i] is exact.
@@ -267,13 +271,15 @@ struct LargeRangeReduction {
267271
}
268272
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
269273

274+
LIBC_INLINE LargeRangeReduction() = default;
275+
270276
private:
271277
// Index of x in the look-up table ONE_TWENTY_EIGHT_OVER_PI.
272-
unsigned idx;
278+
unsigned idx = 0;
273279
// x scaled down by 2^(-16 *(idx - 3))).
274-
double x_reduced;
280+
double x_reduced = 0;
275281
// Parts of (x * 128/pi) mod 1.
276-
double y_hi, y_lo;
282+
double y_hi = 0, y_lo = 0;
277283
DoubleDouble y_mid;
278284
};
279285

@@ -369,6 +375,10 @@ static constexpr Float128 SIN_K_PI_OVER_128_F128[65] = {
369375
};
370376
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
371377

378+
} // namespace range_reduction_double_internal
379+
380+
} // namespace math
381+
372382
} // namespace LIBC_NAMESPACE_DECL
373383

374-
#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H
384+
#endif // LIBC_SRC___SUPPORT_MATH_RANGE_REDUCTION_DOUBLE_COMMON_H

libc/src/math/generic/range_reduction_double_fma.h renamed to libc/src/__support/math/range_reduction_double_fma.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,22 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_FMA_H
10-
#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_FMA_H
9+
#ifndef LIBC_SRC___SUPPORT_MATH_RANGE_REDUCTION_DOUBLE_FMA_H
10+
#define LIBC_SRC___SUPPORT_MATH_RANGE_REDUCTION_DOUBLE_FMA_H
1111

1212
#include "src/__support/FPUtil/FPBits.h"
1313
#include "src/__support/FPUtil/double_double.h"
1414
#include "src/__support/FPUtil/multiply_add.h"
1515
#include "src/__support/FPUtil/nearest_integer.h"
16-
#include "src/__support/common.h"
1716
#include "src/__support/macros/config.h"
18-
#include "src/__support/macros/optimization.h"
19-
#include "src/math/generic/range_reduction_double_common.h"
17+
#include "src/__support/math/range_reduction_double_common.h"
2018

2119
namespace LIBC_NAMESPACE_DECL {
2220

21+
namespace math {
22+
23+
namespace range_reduction_double_internal {
24+
2325
using LIBC_NAMESPACE::fputil::DoubleDouble;
2426

2527
LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) {
@@ -341,6 +343,10 @@ LIBC_INLINE constexpr DoubleDouble SIN_K_PI_OVER_128[] = {
341343
#endif // !LIBC_MATH_HAS_SMALL_TABLES
342344
};
343345

346+
} // namespace range_reduction_double_internal
347+
348+
} // namespace math
349+
344350
} // namespace LIBC_NAMESPACE_DECL
345351

346-
#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_FMA_H
352+
#endif // LIBC_SRC___SUPPORT_MATH_RANGE_REDUCTION_DOUBLE_FMA_H

0 commit comments

Comments
 (0)