Skip to content

Commit 3265e87

Browse files
committed
[libc][math] Refactor atanhf16 implementation to header-only in src/__support/math folder.
1 parent 281df9e commit 3265e87

File tree

12 files changed

+305
-224
lines changed

12 files changed

+305
-224
lines changed

libc/shared/math.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "math/atanf.h"
3030
#include "math/atanf16.h"
3131
#include "math/atanhf.h"
32+
#include "math/atanhf16.h"
3233
#include "math/erff.h"
3334
#include "math/exp.h"
3435
#include "math/exp10.h"

libc/shared/math/atanhf16.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//===-- Shared atanhf16 function --------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SHARED_MATH_ATANHF16_H
10+
#define LLVM_LIBC_SHARED_MATH_ATANHF16_H
11+
12+
#include "shared/libc_common.h"
13+
14+
#ifdef LIBC_TYPES_HAS_FLOAT16
15+
16+
#include "src/__support/math/atanhf16.h"
17+
18+
namespace LIBC_NAMESPACE_DECL {
19+
namespace shared {
20+
21+
using math::atanhf16;
22+
23+
} // namespace shared
24+
} // namespace LIBC_NAMESPACE_DECL
25+
26+
#endif // LIBC_TYPES_HAS_FLOAT16
27+
28+
#endif // LLVM_LIBC_SHARED_MATH_ATANHF16_H

libc/src/__support/math/CMakeLists.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,21 @@ add_header_library(
286286
libc.src.__support.macros.optimization
287287
)
288288

289+
add_header_library(
290+
atanhf16
291+
HDRS
292+
atanhf16.h
293+
DEPENDS
294+
libc.src.__support.FPUtil.fenv_impl
295+
libc.src.__support.FPUtil.fp_bits
296+
libc.src.__support.FPUtil.polyeval
297+
libc.src.__support.FPUtil.cast
298+
libc.src.__support.FPUtil.except_value_utils
299+
libc.src.__support.FPUtil.multiply_add
300+
libc.src.__support.macros.config
301+
libc.src.__support.macros.optimization
302+
)
303+
289304
add_header_library(
290305
asinf
291306
HDRS

libc/src/__support/math/atanhf16.h

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
//===-- Implementation header for atanhf16 ----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF16_H
10+
#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF16_H
11+
12+
#include "include/llvm-libc-macros/float16-macros.h"
13+
14+
#ifdef LIBC_TYPES_HAS_FLOAT16
15+
16+
#include "src/__support/FPUtil/FEnvImpl.h"
17+
#include "src/__support/FPUtil/FPBits.h"
18+
#include "src/__support/FPUtil/PolyEval.h"
19+
#include "src/__support/FPUtil/cast.h"
20+
#include "src/__support/FPUtil/except_value_utils.h"
21+
#include "src/__support/FPUtil/multiply_add.h"
22+
#include "src/__support/macros/config.h"
23+
#include "src/__support/macros/optimization.h"
24+
25+
namespace LIBC_NAMESPACE_DECL {
26+
27+
namespace math {
28+
29+
namespace atanhf16_internal {
30+
31+
// Lookup table for logf(f) = logf(1 + n*2^(-7)) where n = 0..127,
32+
// computed and stored as float precision constants.
33+
// Generated by Sollya with the following commands:
34+
// display = hexadecimal;
35+
// for n from 0 to 127 do { print(single(1 / (1 + n / 128.0))); };
36+
static constexpr float ONE_OVER_F_FLOAT[128] = {
37+
0x1p0f, 0x1.fc07fp-1f, 0x1.f81f82p-1f, 0x1.f4465ap-1f,
38+
0x1.f07c2p-1f, 0x1.ecc07cp-1f, 0x1.e9131ap-1f, 0x1.e573acp-1f,
39+
0x1.e1e1e2p-1f, 0x1.de5d6ep-1f, 0x1.dae608p-1f, 0x1.d77b66p-1f,
40+
0x1.d41d42p-1f, 0x1.d0cb58p-1f, 0x1.cd8568p-1f, 0x1.ca4b3p-1f,
41+
0x1.c71c72p-1f, 0x1.c3f8fp-1f, 0x1.c0e07p-1f, 0x1.bdd2b8p-1f,
42+
0x1.bacf92p-1f, 0x1.b7d6c4p-1f, 0x1.b4e81cp-1f, 0x1.b20364p-1f,
43+
0x1.af286cp-1f, 0x1.ac5702p-1f, 0x1.a98ef6p-1f, 0x1.a6d01ap-1f,
44+
0x1.a41a42p-1f, 0x1.a16d4p-1f, 0x1.9ec8eap-1f, 0x1.9c2d14p-1f,
45+
0x1.99999ap-1f, 0x1.970e5p-1f, 0x1.948b1p-1f, 0x1.920fb4p-1f,
46+
0x1.8f9c18p-1f, 0x1.8d3018p-1f, 0x1.8acb9p-1f, 0x1.886e6p-1f,
47+
0x1.861862p-1f, 0x1.83c978p-1f, 0x1.818182p-1f, 0x1.7f406p-1f,
48+
0x1.7d05f4p-1f, 0x1.7ad22p-1f, 0x1.78a4c8p-1f, 0x1.767dcep-1f,
49+
0x1.745d18p-1f, 0x1.724288p-1f, 0x1.702e06p-1f, 0x1.6e1f76p-1f,
50+
0x1.6c16c2p-1f, 0x1.6a13cep-1f, 0x1.681682p-1f, 0x1.661ec6p-1f,
51+
0x1.642c86p-1f, 0x1.623fa8p-1f, 0x1.605816p-1f, 0x1.5e75bcp-1f,
52+
0x1.5c9882p-1f, 0x1.5ac056p-1f, 0x1.58ed24p-1f, 0x1.571ed4p-1f,
53+
0x1.555556p-1f, 0x1.539094p-1f, 0x1.51d07ep-1f, 0x1.501502p-1f,
54+
0x1.4e5e0ap-1f, 0x1.4cab88p-1f, 0x1.4afd6ap-1f, 0x1.49539ep-1f,
55+
0x1.47ae14p-1f, 0x1.460cbcp-1f, 0x1.446f86p-1f, 0x1.42d662p-1f,
56+
0x1.414142p-1f, 0x1.3fb014p-1f, 0x1.3e22ccp-1f, 0x1.3c995ap-1f,
57+
0x1.3b13b2p-1f, 0x1.3991c2p-1f, 0x1.381382p-1f, 0x1.3698ep-1f,
58+
0x1.3521dp-1f, 0x1.33ae46p-1f, 0x1.323e34p-1f, 0x1.30d19p-1f,
59+
0x1.2f684cp-1f, 0x1.2e025cp-1f, 0x1.2c9fb4p-1f, 0x1.2b404ap-1f,
60+
0x1.29e412p-1f, 0x1.288b02p-1f, 0x1.27350cp-1f, 0x1.25e228p-1f,
61+
0x1.24924ap-1f, 0x1.234568p-1f, 0x1.21fb78p-1f, 0x1.20b47p-1f,
62+
0x1.1f7048p-1f, 0x1.1e2ef4p-1f, 0x1.1cf06ap-1f, 0x1.1bb4a4p-1f,
63+
0x1.1a7b96p-1f, 0x1.194538p-1f, 0x1.181182p-1f, 0x1.16e068p-1f,
64+
0x1.15b1e6p-1f, 0x1.1485fp-1f, 0x1.135c82p-1f, 0x1.12358ep-1f,
65+
0x1.111112p-1f, 0x1.0fef02p-1f, 0x1.0ecf56p-1f, 0x1.0db20ap-1f,
66+
0x1.0c9714p-1f, 0x1.0b7e6ep-1f, 0x1.0a681p-1f, 0x1.0953f4p-1f,
67+
0x1.08421p-1f, 0x1.07326p-1f, 0x1.0624dep-1f, 0x1.05198p-1f,
68+
0x1.041042p-1f, 0x1.03091cp-1f, 0x1.020408p-1f, 0x1.010102p-1f};
69+
70+
// Lookup table for log(f) = log(1 + n*2^(-7)) where n = 0..127,
71+
// computed and stored as float precision constants.
72+
// Generated by Sollya with the following commands:
73+
// display = hexadecimal;
74+
// for n from 0 to 127 do { print(single(log(1 + n / 128.0))); };
75+
static constexpr float LOG_F_FLOAT[128] = {
76+
0.0f, 0x1.fe02a6p-8f, 0x1.fc0a8cp-7f, 0x1.7b91bp-6f,
77+
0x1.f829bp-6f, 0x1.39e87cp-5f, 0x1.77459p-5f, 0x1.b42dd8p-5f,
78+
0x1.f0a30cp-5f, 0x1.16536ep-4f, 0x1.341d7ap-4f, 0x1.51b074p-4f,
79+
0x1.6f0d28p-4f, 0x1.8c345ep-4f, 0x1.a926d4p-4f, 0x1.c5e548p-4f,
80+
0x1.e27076p-4f, 0x1.fec914p-4f, 0x1.0d77e8p-3f, 0x1.1b72aep-3f,
81+
0x1.29553p-3f, 0x1.371fc2p-3f, 0x1.44d2b6p-3f, 0x1.526e5ep-3f,
82+
0x1.5ff308p-3f, 0x1.6d60fep-3f, 0x1.7ab89p-3f, 0x1.87fa06p-3f,
83+
0x1.9525aap-3f, 0x1.a23bc2p-3f, 0x1.af3c94p-3f, 0x1.bc2868p-3f,
84+
0x1.c8ff7cp-3f, 0x1.d5c216p-3f, 0x1.e27076p-3f, 0x1.ef0adcp-3f,
85+
0x1.fb9186p-3f, 0x1.04025ap-2f, 0x1.0a324ep-2f, 0x1.1058cp-2f,
86+
0x1.1675cap-2f, 0x1.1c898cp-2f, 0x1.22942p-2f, 0x1.2895a2p-2f,
87+
0x1.2e8e2cp-2f, 0x1.347ddap-2f, 0x1.3a64c6p-2f, 0x1.404308p-2f,
88+
0x1.4618bcp-2f, 0x1.4be5fap-2f, 0x1.51aad8p-2f, 0x1.576772p-2f,
89+
0x1.5d1bdcp-2f, 0x1.62c83p-2f, 0x1.686c82p-2f, 0x1.6e08eap-2f,
90+
0x1.739d8p-2f, 0x1.792a56p-2f, 0x1.7eaf84p-2f, 0x1.842d1ep-2f,
91+
0x1.89a338p-2f, 0x1.8f11e8p-2f, 0x1.947942p-2f, 0x1.99d958p-2f,
92+
0x1.9f323ep-2f, 0x1.a4840ap-2f, 0x1.a9cecap-2f, 0x1.af1294p-2f,
93+
0x1.b44f78p-2f, 0x1.b9858ap-2f, 0x1.beb4dap-2f, 0x1.c3dd7ap-2f,
94+
0x1.c8ff7cp-2f, 0x1.ce1afp-2f, 0x1.d32fe8p-2f, 0x1.d83e72p-2f,
95+
0x1.dd46ap-2f, 0x1.e24882p-2f, 0x1.e74426p-2f, 0x1.ec399ep-2f,
96+
0x1.f128f6p-2f, 0x1.f6124p-2f, 0x1.faf588p-2f, 0x1.ffd2ep-2f,
97+
0x1.02552ap-1f, 0x1.04bdfap-1f, 0x1.0723e6p-1f, 0x1.0986f4p-1f,
98+
0x1.0be72ep-1f, 0x1.0e4498p-1f, 0x1.109f3ap-1f, 0x1.12f71ap-1f,
99+
0x1.154c3ep-1f, 0x1.179eacp-1f, 0x1.19ee6cp-1f, 0x1.1c3b82p-1f,
100+
0x1.1e85f6p-1f, 0x1.20cdcep-1f, 0x1.23130ep-1f, 0x1.2555bcp-1f,
101+
0x1.2795e2p-1f, 0x1.29d38p-1f, 0x1.2c0e9ep-1f, 0x1.2e4744p-1f,
102+
0x1.307d74p-1f, 0x1.32b134p-1f, 0x1.34e28ap-1f, 0x1.37117cp-1f,
103+
0x1.393e0ep-1f, 0x1.3b6844p-1f, 0x1.3d9026p-1f, 0x1.3fb5b8p-1f,
104+
0x1.41d8fep-1f, 0x1.43f9fep-1f, 0x1.4618bcp-1f, 0x1.48353ep-1f,
105+
0x1.4a4f86p-1f, 0x1.4c679ap-1f, 0x1.4e7d82p-1f, 0x1.50913cp-1f,
106+
0x1.52a2d2p-1f, 0x1.54b246p-1f, 0x1.56bf9ep-1f, 0x1.58cadcp-1f,
107+
0x1.5ad404p-1f, 0x1.5cdb1ep-1f, 0x1.5ee02ap-1f, 0x1.60e33p-1f};
108+
109+
// x should be positive, normal finite value
110+
// TODO: Simplify range reduction and polynomial degree for float16.
111+
// See issue #137190.
112+
LIBC_INLINE static constexpr float log_eval_f(float x) {
113+
// For x = 2^ex * (1 + mx), logf(x) = ex * logf(2) + logf(1 + mx).
114+
using FPBits = fputil::FPBits<float>;
115+
FPBits xbits(x);
116+
117+
float ex = static_cast<float>(xbits.get_exponent());
118+
// p1 is the leading 7 bits of mx, i.e.
119+
// p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7).
120+
int p1 = static_cast<int>(xbits.get_mantissa() >> (FPBits::FRACTION_LEN - 7));
121+
122+
// Set bits to (1 + (mx - p1*2^(-7)))
123+
xbits.set_uintval(xbits.uintval() & (FPBits::FRACTION_MASK >> 7));
124+
xbits.set_biased_exponent(FPBits::EXP_BIAS);
125+
// dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)).
126+
float dx = (xbits.get_val() - 1.0f) * ONE_OVER_F_FLOAT[p1];
127+
128+
// Minimax polynomial for log(1 + dx), generated using Sollya:
129+
// > P = fpminimax(log(1 + x)/x, 6, [|SG...|], [0, 2^-7]);
130+
// > Q = (P - 1) / x;
131+
// > for i from 0 to degree(Q) do print(coeff(Q, i));
132+
constexpr float COEFFS[6] = {-0x1p-1f, 0x1.555556p-2f, -0x1.00022ep-2f,
133+
0x1.9ea056p-3f, -0x1.e50324p-2f, 0x1.c018fp3f};
134+
135+
float dx2 = dx * dx;
136+
137+
float c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
138+
float c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
139+
float c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);
140+
141+
float p = fputil::polyeval(dx2, dx, c1, c2, c3);
142+
143+
// Generated by Sollya with the following commands:
144+
// > display = hexadecimal;
145+
// > round(log(2), SG, RN);
146+
constexpr float LOGF_2 = 0x1.62e43p-1f;
147+
148+
float result = fputil::multiply_add(ex, LOGF_2, LOG_F_FLOAT[p1] + p);
149+
return result;
150+
}
151+
152+
} // namespace atanhf16_internal
153+
154+
LIBC_INLINE static constexpr float16 atanhf16(float16 x) {
155+
constexpr size_t N_EXCEPTS = 1;
156+
constexpr fputil::ExceptValues<float16, N_EXCEPTS> ATANHF16_EXCEPTS{{
157+
// (input, RZ output, RU offset, RD offset, RN offset)
158+
// x = 0x1.a5cp-4, atanhf16(x) = 0x1.a74p-4 (RZ)
159+
{0x2E97, 0x2E9D, 1, 0, 0},
160+
}};
161+
162+
using namespace atanhf16_internal;
163+
using FPBits = fputil::FPBits<float16>;
164+
165+
FPBits xbits(x);
166+
Sign sign = xbits.sign();
167+
uint16_t x_abs = xbits.abs().uintval();
168+
169+
// |x| >= 1
170+
if (LIBC_UNLIKELY(x_abs >= 0x3c00U)) {
171+
if (xbits.is_nan()) {
172+
if (xbits.is_signaling_nan()) {
173+
fputil::raise_except_if_required(FE_INVALID);
174+
return FPBits::quiet_nan().get_val();
175+
}
176+
return x;
177+
}
178+
179+
// |x| == 1.0
180+
if (x_abs == 0x3c00U) {
181+
fputil::set_errno_if_required(ERANGE);
182+
fputil::raise_except_if_required(FE_DIVBYZERO);
183+
return FPBits::inf(sign).get_val();
184+
}
185+
// |x| > 1.0
186+
fputil::set_errno_if_required(EDOM);
187+
fputil::raise_except_if_required(FE_INVALID);
188+
return FPBits::quiet_nan().get_val();
189+
}
190+
191+
if (auto r = ATANHF16_EXCEPTS.lookup(xbits.uintval());
192+
LIBC_UNLIKELY(r.has_value()))
193+
return r.value();
194+
195+
// For |x| less than approximately 0.24
196+
if (LIBC_UNLIKELY(x_abs <= 0x33f3U)) {
197+
// atanh(+/-0) = +/-0
198+
if (LIBC_UNLIKELY(x_abs == 0U))
199+
return x;
200+
// The Taylor expansion of atanh(x) is:
201+
// atanh(x) = x + x^3/3 + x^5/5 + x^7/7 + x^9/9 + x^11/11
202+
// = x * [1 + x^2/3 + x^4/5 + x^6/7 + x^8/9 + x^10/11]
203+
// When |x| < 2^-5 (0x0800U), this can be approximated by:
204+
// atanh(x) ≈ x + (1/3)*x^3
205+
if (LIBC_UNLIKELY(x_abs < 0x0800U)) {
206+
float xf = x;
207+
return fputil::cast<float16>(xf + 0x1.555556p-2f * xf * xf * xf);
208+
}
209+
210+
// For 2^-5 <= |x| <= 0x1.fccp-3 (~0.24):
211+
// Let t = x^2.
212+
// Define P(t) ≈ (1/3)*t + (1/5)*t^2 + (1/7)*t^3 + (1/9)*t^4 + (1/11)*t^5.
213+
// Coefficients (from Sollya, RN, hexadecimal):
214+
// 1/3 = 0x1.555556p-2, 1/5 = 0x1.99999ap-3, 1/7 = 0x1.24924ap-3,
215+
// 1/9 = 0x1.c71c72p-4, 1/11 = 0x1.745d18p-4
216+
// Thus, atanh(x) ≈ x * (1 + P(x^2)).
217+
float xf = x;
218+
float x2 = xf * xf;
219+
float pe = fputil::polyeval(x2, 0.0f, 0x1.555556p-2f, 0x1.99999ap-3f,
220+
0x1.24924ap-3f, 0x1.c71c72p-4f, 0x1.745d18p-4f);
221+
return fputil::cast<float16>(fputil::multiply_add(xf, pe, xf));
222+
}
223+
224+
float xf = x;
225+
return fputil::cast<float16>(0.5 * log_eval_f((xf + 1.0f) / (xf - 1.0f)));
226+
}
227+
228+
} // namespace math
229+
230+
} // namespace LIBC_NAMESPACE_DECL
231+
232+
#endif // LIBC_TYPES_HAS_FLOAT16
233+
234+
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF16_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3932,17 +3932,7 @@ add_entrypoint_object(
39323932
HDRS
39333933
../atanhf16.h
39343934
DEPENDS
3935-
.explogxf
3936-
libc.hdr.errno_macros
3937-
libc.hdr.fenv_macros
3938-
libc.src.__support.FPUtil.cast
3939-
libc.src.__support.FPUtil.except_value_utils
3940-
libc.src.__support.FPUtil.fenv_impl
3941-
libc.src.__support.FPUtil.fp_bits
3942-
libc.src.__support.FPUtil.multiply_add
3943-
libc.src.__support.FPUtil.polyeval
3944-
libc.src.__support.macros.optimization
3945-
libc.src.__support.macros.properties.types
3935+
libc.src.__support.math.atanhf16
39463936
)
39473937

39483938
add_entrypoint_object(

0 commit comments

Comments
 (0)